33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
76 unsigned N =
Node->getNumOperands();
77 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
90 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
92 if (Op0Idx == -1 && Op1Idx == -1)
96 if ((Op0Idx == -1 && Op1Idx != -1) ||
97 (Op1Idx == -1 && Op0Idx != -1))
118 return !
MI.memoperands_empty() &&
120 return MMO->isLoad() && MMO->isInvariant();
142 if (!
MI.hasImplicitDef() &&
143 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
144 !
MI.mayRaiseFPException())
152bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
155 if (
MI.isCompare()) {
166 switch (
Use.getOpcode()) {
167 case AMDGPU::S_AND_SAVEEXEC_B32:
168 case AMDGPU::S_AND_SAVEEXEC_B64:
170 case AMDGPU::S_AND_B32:
171 case AMDGPU::S_AND_B64:
172 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
182 switch (
MI.getOpcode()) {
185 case AMDGPU::V_READFIRSTLANE_B32:
202 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
207 for (
auto Op :
MI.uses()) {
208 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
209 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
214 if (FromCycle ==
nullptr)
220 while (FromCycle && !FromCycle->
contains(ToCycle)) {
240 int64_t &Offset1)
const {
248 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
252 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
268 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
269 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
270 if (Offset0Idx == -1 || Offset1Idx == -1)
277 Offset0Idx -=
get(Opc0).NumDefs;
278 Offset1Idx -=
get(Opc1).NumDefs;
308 if (!Load0Offset || !Load1Offset)
325 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
326 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
328 if (OffIdx0 == -1 || OffIdx1 == -1)
334 OffIdx0 -=
get(Opc0).NumDefs;
335 OffIdx1 -=
get(Opc1).NumDefs;
354 case AMDGPU::DS_READ2ST64_B32:
355 case AMDGPU::DS_READ2ST64_B64:
356 case AMDGPU::DS_WRITE2ST64_B32:
357 case AMDGPU::DS_WRITE2ST64_B64:
372 OffsetIsScalable =
false;
389 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
391 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
392 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
405 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
406 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
407 if (Offset0 + 1 != Offset1)
418 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
426 Offset = EltSize * Offset0;
428 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
429 if (DataOpIdx == -1) {
430 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
448 if (BaseOp && !BaseOp->
isFI())
456 if (SOffset->
isReg())
462 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
464 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
473 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
474 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
476 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
477 if (VAddr0Idx >= 0) {
479 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
486 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
501 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
518 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
520 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
537 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
545 if (MO1->getAddrSpace() != MO2->getAddrSpace())
548 const auto *Base1 = MO1->getValue();
549 const auto *Base2 = MO2->getValue();
550 if (!Base1 || !Base2)
558 return Base1 == Base2;
562 int64_t Offset1,
bool OffsetIsScalable1,
564 int64_t Offset2,
bool OffsetIsScalable2,
565 unsigned ClusterSize,
566 unsigned NumBytes)
const {
579 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
598 const unsigned LoadSize = NumBytes / ClusterSize;
599 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
600 return NumDWords <= MaxMemoryClusterDWords;
614 int64_t Offset0, int64_t Offset1,
615 unsigned NumLoads)
const {
616 assert(Offset1 > Offset0 &&
617 "Second offset should be larger than first offset!");
622 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
629 const char *Msg =
"illegal VGPR to SGPR copy") {
650 assert((
TII.getSubtarget().hasMAIInsts() &&
651 !
TII.getSubtarget().hasGFX90AInsts()) &&
652 "Expected GFX908 subtarget.");
655 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
656 "Source register of the copy should be either an SGPR or an AGPR.");
659 "Destination register of the copy should be an AGPR.");
668 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
671 if (!Def->modifiesRegister(SrcReg, &RI))
674 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
675 Def->getOperand(0).getReg() != SrcReg)
682 bool SafeToPropagate =
true;
685 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
686 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
687 SafeToPropagate =
false;
689 if (!SafeToPropagate)
692 for (
auto I = Def;
I !=
MI; ++
I)
693 I->clearRegisterKills(DefOp.
getReg(), &RI);
702 if (ImpUseSuperReg) {
703 Builder.addReg(ImpUseSuperReg,
711 RS.enterBasicBlockEnd(
MBB);
712 RS.backward(std::next(
MI));
721 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
724 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
725 "VGPR used for an intermediate copy should have been reserved.");
730 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
740 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
741 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
742 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
749 if (ImpUseSuperReg) {
750 UseBuilder.
addReg(ImpUseSuperReg,
771 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
772 int16_t SubIdx = BaseIndices[Idx];
773 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
774 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
775 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
776 unsigned Opcode = AMDGPU::S_MOV_B32;
779 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
780 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
781 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
785 DestSubReg = RI.getSubReg(DestReg, SubIdx);
786 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
787 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
788 Opcode = AMDGPU::S_MOV_B64;
803 assert(FirstMI && LastMI);
811 LastMI->addRegisterKilled(SrcReg, &RI);
817 Register SrcReg,
bool KillSrc,
bool RenamableDest,
818 bool RenamableSrc)
const {
820 unsigned Size = RI.getRegSizeInBits(*RC);
822 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
828 if (((
Size == 16) != (SrcSize == 16))) {
830 assert(ST.useRealTrue16Insts());
835 if (DestReg == SrcReg) {
841 RC = RI.getPhysRegBaseClass(DestReg);
842 Size = RI.getRegSizeInBits(*RC);
843 SrcRC = RI.getPhysRegBaseClass(SrcReg);
844 SrcSize = RI.getRegSizeInBits(*SrcRC);
848 if (RC == &AMDGPU::VGPR_32RegClass) {
850 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
851 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
852 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
853 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
859 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
860 RC == &AMDGPU::SReg_32RegClass) {
861 if (SrcReg == AMDGPU::SCC) {
868 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
869 if (DestReg == AMDGPU::VCC_LO) {
887 if (RC == &AMDGPU::SReg_64RegClass) {
888 if (SrcReg == AMDGPU::SCC) {
895 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
896 if (DestReg == AMDGPU::VCC) {
914 if (DestReg == AMDGPU::SCC) {
917 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
921 assert(ST.hasScalarCompareEq64());
935 if (RC == &AMDGPU::AGPR_32RegClass) {
936 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
937 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
943 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
952 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
959 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
960 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
962 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
963 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
964 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
965 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
968 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
969 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
982 if (IsAGPRDst || IsAGPRSrc) {
983 if (!DstLow || !SrcLow) {
985 "Cannot use hi16 subreg with an AGPR!");
992 if (ST.useRealTrue16Insts()) {
998 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
999 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1011 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1012 if (!DstLow || !SrcLow) {
1014 "Cannot use hi16 subreg on VI!");
1037 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1038 if (ST.hasMovB64()) {
1043 if (ST.hasPkMovB32()) {
1059 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1060 if (RI.isSGPRClass(RC)) {
1061 if (!RI.isSGPRClass(SrcRC)) {
1065 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1071 unsigned EltSize = 4;
1072 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1073 if (RI.isAGPRClass(RC)) {
1074 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1075 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1076 else if (RI.hasVGPRs(SrcRC) ||
1077 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1078 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1080 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1081 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1082 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1083 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1084 (RI.isProperlyAlignedRC(*RC) &&
1085 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1087 if (ST.hasMovB64()) {
1088 Opcode = AMDGPU::V_MOV_B64_e32;
1090 }
else if (ST.hasPkMovB32()) {
1091 Opcode = AMDGPU::V_PK_MOV_B32;
1101 std::unique_ptr<RegScavenger> RS;
1102 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1103 RS = std::make_unique<RegScavenger>();
1109 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1110 const bool CanKillSuperReg = KillSrc && !Overlap;
1112 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1115 SubIdx = SubIndices[Idx];
1117 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1118 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1119 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1120 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1122 bool IsFirstSubreg = Idx == 0;
1123 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1125 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1129 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1130 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1176 return &AMDGPU::VGPR_32RegClass;
1188 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1189 "Not a VGPR32 reg");
1191 if (
Cond.size() == 1) {
1192 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1201 }
else if (
Cond.size() == 2) {
1202 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1204 case SIInstrInfo::SCC_TRUE: {
1205 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1215 case SIInstrInfo::SCC_FALSE: {
1216 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1226 case SIInstrInfo::VCCNZ: {
1229 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1240 case SIInstrInfo::VCCZ: {
1243 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1254 case SIInstrInfo::EXECNZ: {
1255 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1256 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1267 case SIInstrInfo::EXECZ: {
1268 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1269 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1294 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1307 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1317 int64_t &ImmVal)
const {
1318 switch (
MI.getOpcode()) {
1319 case AMDGPU::V_MOV_B32_e32:
1320 case AMDGPU::S_MOV_B32:
1321 case AMDGPU::S_MOVK_I32:
1322 case AMDGPU::S_MOV_B64:
1323 case AMDGPU::V_MOV_B64_e32:
1324 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1325 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1326 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1327 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::V_MOV_B64_PSEUDO: {
1332 return MI.getOperand(0).getReg() == Reg;
1337 case AMDGPU::S_BREV_B32:
1338 case AMDGPU::V_BFREV_B32_e32:
1339 case AMDGPU::V_BFREV_B32_e64: {
1343 return MI.getOperand(0).getReg() == Reg;
1348 case AMDGPU::S_NOT_B32:
1349 case AMDGPU::V_NOT_B32_e32:
1350 case AMDGPU::V_NOT_B32_e64: {
1353 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1354 return MI.getOperand(0).getReg() == Reg;
1366 if (RI.isAGPRClass(DstRC))
1367 return AMDGPU::COPY;
1368 if (RI.getRegSizeInBits(*DstRC) == 16) {
1371 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1373 if (RI.getRegSizeInBits(*DstRC) == 32)
1374 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1375 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1376 return AMDGPU::S_MOV_B64;
1377 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1378 return AMDGPU::V_MOV_B64_PSEUDO;
1379 return AMDGPU::COPY;
1384 bool IsIndirectSrc)
const {
1385 if (IsIndirectSrc) {
1387 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1389 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1391 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1393 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1395 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1397 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1399 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1401 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1403 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1405 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1407 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1408 if (VecSize <= 1024)
1409 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1415 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1417 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1419 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1421 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1423 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1425 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1427 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1429 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1431 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1433 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1435 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1436 if (VecSize <= 1024)
1437 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1444 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1446 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1448 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1450 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1452 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1454 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1456 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1458 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1460 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1462 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1464 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1465 if (VecSize <= 1024)
1466 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1473 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1475 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1477 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1479 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1481 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1483 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1485 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1487 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1489 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1491 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1493 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1494 if (VecSize <= 1024)
1495 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1509 if (VecSize <= 1024)
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1517 bool IsSGPR)
const {
1529 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1536 return AMDGPU::SI_SPILL_S32_SAVE;
1538 return AMDGPU::SI_SPILL_S64_SAVE;
1540 return AMDGPU::SI_SPILL_S96_SAVE;
1542 return AMDGPU::SI_SPILL_S128_SAVE;
1544 return AMDGPU::SI_SPILL_S160_SAVE;
1546 return AMDGPU::SI_SPILL_S192_SAVE;
1548 return AMDGPU::SI_SPILL_S224_SAVE;
1550 return AMDGPU::SI_SPILL_S256_SAVE;
1552 return AMDGPU::SI_SPILL_S288_SAVE;
1554 return AMDGPU::SI_SPILL_S320_SAVE;
1556 return AMDGPU::SI_SPILL_S352_SAVE;
1558 return AMDGPU::SI_SPILL_S384_SAVE;
1560 return AMDGPU::SI_SPILL_S512_SAVE;
1562 return AMDGPU::SI_SPILL_S1024_SAVE;
1571 return AMDGPU::SI_SPILL_V16_SAVE;
1573 return AMDGPU::SI_SPILL_V32_SAVE;
1575 return AMDGPU::SI_SPILL_V64_SAVE;
1577 return AMDGPU::SI_SPILL_V96_SAVE;
1579 return AMDGPU::SI_SPILL_V128_SAVE;
1581 return AMDGPU::SI_SPILL_V160_SAVE;
1583 return AMDGPU::SI_SPILL_V192_SAVE;
1585 return AMDGPU::SI_SPILL_V224_SAVE;
1587 return AMDGPU::SI_SPILL_V256_SAVE;
1589 return AMDGPU::SI_SPILL_V288_SAVE;
1591 return AMDGPU::SI_SPILL_V320_SAVE;
1593 return AMDGPU::SI_SPILL_V352_SAVE;
1595 return AMDGPU::SI_SPILL_V384_SAVE;
1597 return AMDGPU::SI_SPILL_V512_SAVE;
1599 return AMDGPU::SI_SPILL_V1024_SAVE;
1608 return AMDGPU::SI_SPILL_AV32_SAVE;
1610 return AMDGPU::SI_SPILL_AV64_SAVE;
1612 return AMDGPU::SI_SPILL_AV96_SAVE;
1614 return AMDGPU::SI_SPILL_AV128_SAVE;
1616 return AMDGPU::SI_SPILL_AV160_SAVE;
1618 return AMDGPU::SI_SPILL_AV192_SAVE;
1620 return AMDGPU::SI_SPILL_AV224_SAVE;
1622 return AMDGPU::SI_SPILL_AV256_SAVE;
1624 return AMDGPU::SI_SPILL_AV288_SAVE;
1626 return AMDGPU::SI_SPILL_AV320_SAVE;
1628 return AMDGPU::SI_SPILL_AV352_SAVE;
1630 return AMDGPU::SI_SPILL_AV384_SAVE;
1632 return AMDGPU::SI_SPILL_AV512_SAVE;
1634 return AMDGPU::SI_SPILL_AV1024_SAVE;
1641 bool IsVectorSuperClass) {
1646 if (IsVectorSuperClass)
1647 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1649 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1655 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1662 if (ST.hasMAIInsts())
1682 FrameInfo.getObjectAlign(FrameIndex));
1683 unsigned SpillSize =
TRI->getSpillSize(*RC);
1686 if (RI.isSGPRClass(RC)) {
1688 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1689 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1690 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1698 if (SrcReg.
isVirtual() && SpillSize == 4) {
1699 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1708 if (RI.spillSGPRToVGPR())
1728 return AMDGPU::SI_SPILL_S32_RESTORE;
1730 return AMDGPU::SI_SPILL_S64_RESTORE;
1732 return AMDGPU::SI_SPILL_S96_RESTORE;
1734 return AMDGPU::SI_SPILL_S128_RESTORE;
1736 return AMDGPU::SI_SPILL_S160_RESTORE;
1738 return AMDGPU::SI_SPILL_S192_RESTORE;
1740 return AMDGPU::SI_SPILL_S224_RESTORE;
1742 return AMDGPU::SI_SPILL_S256_RESTORE;
1744 return AMDGPU::SI_SPILL_S288_RESTORE;
1746 return AMDGPU::SI_SPILL_S320_RESTORE;
1748 return AMDGPU::SI_SPILL_S352_RESTORE;
1750 return AMDGPU::SI_SPILL_S384_RESTORE;
1752 return AMDGPU::SI_SPILL_S512_RESTORE;
1754 return AMDGPU::SI_SPILL_S1024_RESTORE;
1763 return AMDGPU::SI_SPILL_V16_RESTORE;
1765 return AMDGPU::SI_SPILL_V32_RESTORE;
1767 return AMDGPU::SI_SPILL_V64_RESTORE;
1769 return AMDGPU::SI_SPILL_V96_RESTORE;
1771 return AMDGPU::SI_SPILL_V128_RESTORE;
1773 return AMDGPU::SI_SPILL_V160_RESTORE;
1775 return AMDGPU::SI_SPILL_V192_RESTORE;
1777 return AMDGPU::SI_SPILL_V224_RESTORE;
1779 return AMDGPU::SI_SPILL_V256_RESTORE;
1781 return AMDGPU::SI_SPILL_V288_RESTORE;
1783 return AMDGPU::SI_SPILL_V320_RESTORE;
1785 return AMDGPU::SI_SPILL_V352_RESTORE;
1787 return AMDGPU::SI_SPILL_V384_RESTORE;
1789 return AMDGPU::SI_SPILL_V512_RESTORE;
1791 return AMDGPU::SI_SPILL_V1024_RESTORE;
1800 return AMDGPU::SI_SPILL_AV32_RESTORE;
1802 return AMDGPU::SI_SPILL_AV64_RESTORE;
1804 return AMDGPU::SI_SPILL_AV96_RESTORE;
1806 return AMDGPU::SI_SPILL_AV128_RESTORE;
1808 return AMDGPU::SI_SPILL_AV160_RESTORE;
1810 return AMDGPU::SI_SPILL_AV192_RESTORE;
1812 return AMDGPU::SI_SPILL_AV224_RESTORE;
1814 return AMDGPU::SI_SPILL_AV256_RESTORE;
1816 return AMDGPU::SI_SPILL_AV288_RESTORE;
1818 return AMDGPU::SI_SPILL_AV320_RESTORE;
1820 return AMDGPU::SI_SPILL_AV352_RESTORE;
1822 return AMDGPU::SI_SPILL_AV384_RESTORE;
1824 return AMDGPU::SI_SPILL_AV512_RESTORE;
1826 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1833 bool IsVectorSuperClass) {
1838 if (IsVectorSuperClass)
1839 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1841 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1847 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1854 if (ST.hasMAIInsts())
1857 assert(!RI.isAGPRClass(RC));
1872 unsigned SpillSize =
TRI->getSpillSize(*RC);
1879 FrameInfo.getObjectAlign(FrameIndex));
1881 if (RI.isSGPRClass(RC)) {
1883 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1884 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1885 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1890 if (DestReg.
isVirtual() && SpillSize == 4) {
1892 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1895 if (RI.spillSGPRToVGPR())
1921 unsigned Quantity)
const {
1923 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1924 while (Quantity > 0) {
1925 unsigned Arg = std::min(Quantity, MaxSNopCount);
1932 auto *MF =
MBB.getParent();
1935 assert(Info->isEntryFunction());
1937 if (
MBB.succ_empty()) {
1938 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1939 if (HasNoTerminator) {
1940 if (Info->returnsVoid()) {
1954 constexpr unsigned DoorbellIDMask = 0x3ff;
1955 constexpr unsigned ECQueueWaveAbort = 0x400;
1961 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1962 ContBB =
MBB.splitAt(
MI,
false);
1966 MBB.addSuccessor(TrapBB);
1973 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1977 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1980 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1981 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1985 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1986 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1987 .
addUse(DoorbellRegMasked)
1988 .
addImm(ECQueueWaveAbort);
1989 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1990 .
addUse(SetWaveAbortBit);
1993 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2008 switch (
MI.getOpcode()) {
2010 if (
MI.isMetaInstruction())
2015 return MI.getOperand(0).getImm() + 1;
2025 switch (
MI.getOpcode()) {
2027 case AMDGPU::S_MOV_B64_term:
2030 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2033 case AMDGPU::S_MOV_B32_term:
2036 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2039 case AMDGPU::S_XOR_B64_term:
2042 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2045 case AMDGPU::S_XOR_B32_term:
2048 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2050 case AMDGPU::S_OR_B64_term:
2053 MI.setDesc(
get(AMDGPU::S_OR_B64));
2055 case AMDGPU::S_OR_B32_term:
2058 MI.setDesc(
get(AMDGPU::S_OR_B32));
2061 case AMDGPU::S_ANDN2_B64_term:
2064 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2067 case AMDGPU::S_ANDN2_B32_term:
2070 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2073 case AMDGPU::S_AND_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_AND_B64));
2079 case AMDGPU::S_AND_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_AND_B32));
2085 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2091 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2097 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2098 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2101 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2102 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2104 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2108 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2111 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2114 int64_t Imm =
MI.getOperand(1).getImm();
2116 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2117 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2124 MI.eraseFromParent();
2130 case AMDGPU::V_MOV_B64_PSEUDO: {
2132 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2133 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2138 if (ST.hasMovB64()) {
2139 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2144 if (
SrcOp.isImm()) {
2146 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2147 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2169 if (ST.hasPkMovB32() &&
2190 MI.eraseFromParent();
2193 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2197 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2201 if (ST.has64BitLiterals()) {
2202 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2208 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2213 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2214 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2216 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2217 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2224 MI.eraseFromParent();
2227 case AMDGPU::V_SET_INACTIVE_B32: {
2231 .
add(
MI.getOperand(3))
2232 .
add(
MI.getOperand(4))
2233 .
add(
MI.getOperand(1))
2234 .
add(
MI.getOperand(2))
2235 .
add(
MI.getOperand(5));
2236 MI.eraseFromParent();
2239 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2240 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2241 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2242 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2243 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2244 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2245 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2246 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2247 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2248 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2249 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2250 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2251 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2252 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2253 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2254 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2255 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2256 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2257 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2258 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2259 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2260 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2261 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2262 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2263 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2264 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2265 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2266 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2267 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2271 if (RI.hasVGPRs(EltRC)) {
2272 Opc = AMDGPU::V_MOVRELD_B32_e32;
2274 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2275 : AMDGPU::S_MOVRELD_B32;
2280 bool IsUndef =
MI.getOperand(1).isUndef();
2281 unsigned SubReg =
MI.getOperand(3).getImm();
2282 assert(VecReg ==
MI.getOperand(1).getReg());
2287 .
add(
MI.getOperand(2))
2291 const int ImpDefIdx =
2293 const int ImpUseIdx = ImpDefIdx + 1;
2295 MI.eraseFromParent();
2298 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2305 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2306 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2307 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2308 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2309 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2310 assert(ST.useVGPRIndexMode());
2312 bool IsUndef =
MI.getOperand(1).isUndef();
2321 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2325 .
add(
MI.getOperand(2))
2330 const int ImpDefIdx =
2332 const int ImpUseIdx = ImpDefIdx + 1;
2339 MI.eraseFromParent();
2342 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2343 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2344 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2345 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2346 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2347 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2348 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2349 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2350 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2351 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2352 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2353 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2354 assert(ST.useVGPRIndexMode());
2357 bool IsUndef =
MI.getOperand(1).isUndef();
2375 MI.eraseFromParent();
2378 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2381 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2382 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2401 if (ST.hasGetPCZeroExtension()) {
2405 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2412 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2422 MI.eraseFromParent();
2425 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2435 Op.setOffset(
Op.getOffset() + 4);
2437 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2441 MI.eraseFromParent();
2444 case AMDGPU::ENTER_STRICT_WWM: {
2450 case AMDGPU::ENTER_STRICT_WQM: {
2457 MI.eraseFromParent();
2460 case AMDGPU::EXIT_STRICT_WWM:
2461 case AMDGPU::EXIT_STRICT_WQM: {
2467 case AMDGPU::SI_RETURN: {
2481 MI.eraseFromParent();
2485 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2486 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2487 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2490 case AMDGPU::S_GETPC_B64_pseudo:
2491 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2492 if (ST.hasGetPCZeroExtension()) {
2494 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2503 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2504 assert(ST.hasBF16PackedInsts());
2505 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2529 case AMDGPU::S_LOAD_DWORDX16_IMM:
2530 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2543 for (
auto &CandMO :
I->operands()) {
2544 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2552 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2556 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2560 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2562 unsigned NewOpcode = -1;
2563 if (SubregSize == 256)
2564 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2565 else if (SubregSize == 128)
2566 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2573 MRI.setRegClass(DestReg, NewRC);
2576 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2581 MI->getOperand(0).setReg(DestReg);
2582 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2586 OffsetMO->
setImm(FinalOffset);
2592 MI->setMemRefs(*MF, NewMMOs);
2605std::pair<MachineInstr*, MachineInstr*>
2607 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2609 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2612 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2613 return std::pair(&
MI,
nullptr);
2624 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2626 if (Dst.isPhysical()) {
2627 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2630 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2634 for (
unsigned I = 1;
I <= 2; ++
I) {
2637 if (
SrcOp.isImm()) {
2639 Imm.ashrInPlace(Part * 32);
2640 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2644 if (Src.isPhysical())
2645 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2652 MovDPP.addImm(MO.getImm());
2654 Split[Part] = MovDPP;
2658 if (Dst.isVirtual())
2665 MI.eraseFromParent();
2666 return std::pair(Split[0], Split[1]);
2669std::optional<DestSourcePair>
2671 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2674 return std::nullopt;
2678 AMDGPU::OpName Src0OpName,
2680 AMDGPU::OpName Src1OpName)
const {
2687 "All commutable instructions have both src0 and src1 modifiers");
2689 int Src0ModsVal = Src0Mods->
getImm();
2690 int Src1ModsVal = Src1Mods->
getImm();
2692 Src1Mods->
setImm(Src0ModsVal);
2693 Src0Mods->
setImm(Src1ModsVal);
2702 bool IsKill = RegOp.
isKill();
2704 bool IsUndef = RegOp.
isUndef();
2705 bool IsDebug = RegOp.
isDebug();
2707 if (NonRegOp.
isImm())
2709 else if (NonRegOp.
isFI())
2730 int64_t NonRegVal = NonRegOp1.
getImm();
2733 NonRegOp2.
setImm(NonRegVal);
2740 unsigned OpIdx1)
const {
2745 unsigned Opc =
MI.getOpcode();
2746 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2756 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2759 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2764 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2770 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2785 unsigned Src1Idx)
const {
2786 assert(!NewMI &&
"this should never be used");
2788 unsigned Opc =
MI.getOpcode();
2790 if (CommutedOpcode == -1)
2793 if (Src0Idx > Src1Idx)
2796 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2797 static_cast<int>(Src0Idx) &&
2798 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2799 static_cast<int>(Src1Idx) &&
2800 "inconsistency with findCommutedOpIndices");
2825 Src1, AMDGPU::OpName::src1_modifiers);
2828 AMDGPU::OpName::src1_sel);
2840 unsigned &SrcOpIdx0,
2841 unsigned &SrcOpIdx1)
const {
2846 unsigned &SrcOpIdx0,
2847 unsigned &SrcOpIdx1)
const {
2848 if (!
Desc.isCommutable())
2851 unsigned Opc =
Desc.getOpcode();
2852 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2856 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2860 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2864 int64_t BrOffset)
const {
2881 return MI.getOperand(0).getMBB();
2886 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2887 MI.getOpcode() == AMDGPU::SI_LOOP)
2899 "new block should be inserted for expanding unconditional branch");
2902 "restore block should be inserted for restoring clobbered registers");
2910 if (ST.hasAddPC64Inst()) {
2912 MCCtx.createTempSymbol(
"offset",
true);
2916 MCCtx.createTempSymbol(
"post_addpc",
true);
2917 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2921 Offset->setVariableValue(OffsetExpr);
2925 assert(RS &&
"RegScavenger required for long branching");
2929 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2933 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2934 ST.hasVALUReadSGPRHazard();
2935 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2936 if (FlushSGPRWrites)
2944 ApplyHazardWorkarounds();
2947 MCCtx.createTempSymbol(
"post_getpc",
true);
2951 MCCtx.createTempSymbol(
"offset_lo",
true);
2953 MCCtx.createTempSymbol(
"offset_hi",
true);
2956 .
addReg(PCReg, 0, AMDGPU::sub0)
2960 .
addReg(PCReg, 0, AMDGPU::sub1)
2962 ApplyHazardWorkarounds();
3003 if (LongBranchReservedReg) {
3004 RS->enterBasicBlock(
MBB);
3005 Scav = LongBranchReservedReg;
3007 RS->enterBasicBlockEnd(
MBB);
3008 Scav = RS->scavengeRegisterBackwards(
3013 RS->setRegUsed(Scav);
3014 MRI.replaceRegWith(PCReg, Scav);
3015 MRI.clearVirtRegs();
3021 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3022 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3023 MRI.clearVirtRegs();
3038unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3040 case SIInstrInfo::SCC_TRUE:
3041 return AMDGPU::S_CBRANCH_SCC1;
3042 case SIInstrInfo::SCC_FALSE:
3043 return AMDGPU::S_CBRANCH_SCC0;
3044 case SIInstrInfo::VCCNZ:
3045 return AMDGPU::S_CBRANCH_VCCNZ;
3046 case SIInstrInfo::VCCZ:
3047 return AMDGPU::S_CBRANCH_VCCZ;
3048 case SIInstrInfo::EXECNZ:
3049 return AMDGPU::S_CBRANCH_EXECNZ;
3050 case SIInstrInfo::EXECZ:
3051 return AMDGPU::S_CBRANCH_EXECZ;
3057SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3059 case AMDGPU::S_CBRANCH_SCC0:
3061 case AMDGPU::S_CBRANCH_SCC1:
3063 case AMDGPU::S_CBRANCH_VCCNZ:
3065 case AMDGPU::S_CBRANCH_VCCZ:
3067 case AMDGPU::S_CBRANCH_EXECNZ:
3069 case AMDGPU::S_CBRANCH_EXECZ:
3081 bool AllowModify)
const {
3082 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3084 TBB =
I->getOperand(0).getMBB();
3088 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3089 if (Pred == INVALID_BR)
3094 Cond.push_back(
I->getOperand(1));
3098 if (
I ==
MBB.end()) {
3104 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3106 FBB =
I->getOperand(0).getMBB();
3116 bool AllowModify)
const {
3124 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3125 switch (
I->getOpcode()) {
3126 case AMDGPU::S_MOV_B64_term:
3127 case AMDGPU::S_XOR_B64_term:
3128 case AMDGPU::S_OR_B64_term:
3129 case AMDGPU::S_ANDN2_B64_term:
3130 case AMDGPU::S_AND_B64_term:
3131 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3132 case AMDGPU::S_MOV_B32_term:
3133 case AMDGPU::S_XOR_B32_term:
3134 case AMDGPU::S_OR_B32_term:
3135 case AMDGPU::S_ANDN2_B32_term:
3136 case AMDGPU::S_AND_B32_term:
3137 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3140 case AMDGPU::SI_ELSE:
3141 case AMDGPU::SI_KILL_I1_TERMINATOR:
3142 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3159 int *BytesRemoved)
const {
3161 unsigned RemovedSize = 0;
3164 if (
MI.isBranch() ||
MI.isReturn()) {
3166 MI.eraseFromParent();
3172 *BytesRemoved = RemovedSize;
3189 int *BytesAdded)
const {
3190 if (!FBB &&
Cond.empty()) {
3194 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3201 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3213 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3231 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3238 if (
Cond.size() != 2) {
3242 if (
Cond[0].isImm()) {
3253 Register FalseReg,
int &CondCycles,
3254 int &TrueCycles,
int &FalseCycles)
const {
3260 if (
MRI.getRegClass(FalseReg) != RC)
3264 CondCycles = TrueCycles = FalseCycles = NumInsts;
3267 return RI.hasVGPRs(RC) && NumInsts <= 6;
3275 if (
MRI.getRegClass(FalseReg) != RC)
3281 if (NumInsts % 2 == 0)
3284 CondCycles = TrueCycles = FalseCycles = NumInsts;
3285 return RI.isSGPRClass(RC);
3296 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3297 if (Pred == VCCZ || Pred == SCC_FALSE) {
3298 Pred =
static_cast<BranchPredicate
>(-Pred);
3304 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3306 if (DstSize == 32) {
3308 if (Pred == SCC_TRUE) {
3323 if (DstSize == 64 && Pred == SCC_TRUE) {
3333 static const int16_t Sub0_15[] = {
3334 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3335 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3336 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3337 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3340 static const int16_t Sub0_15_64[] = {
3341 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3342 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3343 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3344 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3347 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3349 const int16_t *SubIndices = Sub0_15;
3350 int NElts = DstSize / 32;
3354 if (Pred == SCC_TRUE) {
3356 SelOp = AMDGPU::S_CSELECT_B32;
3357 EltRC = &AMDGPU::SGPR_32RegClass;
3359 SelOp = AMDGPU::S_CSELECT_B64;
3360 EltRC = &AMDGPU::SGPR_64RegClass;
3361 SubIndices = Sub0_15_64;
3367 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3372 for (
int Idx = 0; Idx != NElts; ++Idx) {
3373 Register DstElt =
MRI.createVirtualRegister(EltRC);
3376 unsigned SubIdx = SubIndices[Idx];
3379 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3382 .
addReg(FalseReg, 0, SubIdx)
3383 .
addReg(TrueReg, 0, SubIdx);
3387 .
addReg(TrueReg, 0, SubIdx)
3388 .
addReg(FalseReg, 0, SubIdx);
3400 switch (
MI.getOpcode()) {
3401 case AMDGPU::V_MOV_B16_t16_e32:
3402 case AMDGPU::V_MOV_B16_t16_e64:
3403 case AMDGPU::V_MOV_B32_e32:
3404 case AMDGPU::V_MOV_B32_e64:
3405 case AMDGPU::V_MOV_B64_PSEUDO:
3406 case AMDGPU::V_MOV_B64_e32:
3407 case AMDGPU::V_MOV_B64_e64:
3408 case AMDGPU::S_MOV_B32:
3409 case AMDGPU::S_MOV_B64:
3410 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3412 case AMDGPU::WWM_COPY:
3413 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3414 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3415 case AMDGPU::V_ACCVGPR_MOV_B32:
3416 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3417 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3425 switch (
MI.getOpcode()) {
3426 case AMDGPU::V_MOV_B16_t16_e32:
3427 case AMDGPU::V_MOV_B16_t16_e64:
3429 case AMDGPU::V_MOV_B32_e32:
3430 case AMDGPU::V_MOV_B32_e64:
3431 case AMDGPU::V_MOV_B64_PSEUDO:
3432 case AMDGPU::V_MOV_B64_e32:
3433 case AMDGPU::V_MOV_B64_e64:
3434 case AMDGPU::S_MOV_B32:
3435 case AMDGPU::S_MOV_B64:
3436 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3438 case AMDGPU::WWM_COPY:
3439 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3440 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3441 case AMDGPU::V_ACCVGPR_MOV_B32:
3442 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3443 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3451 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3452 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3453 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3456 unsigned Opc =
MI.getOpcode();
3458 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3460 MI.removeOperand(Idx);
3465 unsigned SubRegIndex) {
3466 switch (SubRegIndex) {
3467 case AMDGPU::NoSubRegister:
3477 case AMDGPU::sub1_lo16:
3479 case AMDGPU::sub1_hi16:
3482 return std::nullopt;
3490 case AMDGPU::V_MAC_F16_e32:
3491 case AMDGPU::V_MAC_F16_e64:
3492 case AMDGPU::V_MAD_F16_e64:
3493 return AMDGPU::V_MADAK_F16;
3494 case AMDGPU::V_MAC_F32_e32:
3495 case AMDGPU::V_MAC_F32_e64:
3496 case AMDGPU::V_MAD_F32_e64:
3497 return AMDGPU::V_MADAK_F32;
3498 case AMDGPU::V_FMAC_F32_e32:
3499 case AMDGPU::V_FMAC_F32_e64:
3500 case AMDGPU::V_FMA_F32_e64:
3501 return AMDGPU::V_FMAAK_F32;
3502 case AMDGPU::V_FMAC_F16_e32:
3503 case AMDGPU::V_FMAC_F16_e64:
3504 case AMDGPU::V_FMAC_F16_t16_e64:
3505 case AMDGPU::V_FMAC_F16_fake16_e64:
3506 case AMDGPU::V_FMA_F16_e64:
3507 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3508 ? AMDGPU::V_FMAAK_F16_t16
3509 : AMDGPU::V_FMAAK_F16_fake16
3510 : AMDGPU::V_FMAAK_F16;
3511 case AMDGPU::V_FMAC_F64_e32:
3512 case AMDGPU::V_FMAC_F64_e64:
3513 case AMDGPU::V_FMA_F64_e64:
3514 return AMDGPU::V_FMAAK_F64;
3522 case AMDGPU::V_MAC_F16_e32:
3523 case AMDGPU::V_MAC_F16_e64:
3524 case AMDGPU::V_MAD_F16_e64:
3525 return AMDGPU::V_MADMK_F16;
3526 case AMDGPU::V_MAC_F32_e32:
3527 case AMDGPU::V_MAC_F32_e64:
3528 case AMDGPU::V_MAD_F32_e64:
3529 return AMDGPU::V_MADMK_F32;
3530 case AMDGPU::V_FMAC_F32_e32:
3531 case AMDGPU::V_FMAC_F32_e64:
3532 case AMDGPU::V_FMA_F32_e64:
3533 return AMDGPU::V_FMAMK_F32;
3534 case AMDGPU::V_FMAC_F16_e32:
3535 case AMDGPU::V_FMAC_F16_e64:
3536 case AMDGPU::V_FMAC_F16_t16_e64:
3537 case AMDGPU::V_FMAC_F16_fake16_e64:
3538 case AMDGPU::V_FMA_F16_e64:
3539 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3540 ? AMDGPU::V_FMAMK_F16_t16
3541 : AMDGPU::V_FMAMK_F16_fake16
3542 : AMDGPU::V_FMAMK_F16;
3543 case AMDGPU::V_FMAC_F64_e32:
3544 case AMDGPU::V_FMAC_F64_e64:
3545 case AMDGPU::V_FMA_F64_e64:
3546 return AMDGPU::V_FMAMK_F64;
3558 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3560 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3563 if (
Opc == AMDGPU::COPY) {
3564 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3571 if (HasMultipleUses) {
3574 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3577 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3585 if (ImmDefSize == 32 &&
3590 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3591 RI.getSubRegIdxSize(UseSubReg) == 16;
3594 if (RI.hasVGPRs(DstRC))
3597 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3603 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3610 for (
unsigned MovOp :
3611 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3612 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3620 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3624 if (MovDstPhysReg) {
3628 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3635 if (MovDstPhysReg) {
3636 if (!MovDstRC->
contains(MovDstPhysReg))
3638 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3652 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3660 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3664 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3666 UseMI.getOperand(0).setReg(MovDstPhysReg);
3671 UseMI.setDesc(NewMCID);
3672 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3673 UseMI.addImplicitDefUseOperands(*MF);
3677 if (HasMultipleUses)
3680 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3681 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3682 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3683 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3684 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3685 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3686 Opc == AMDGPU::V_FMAC_F64_e64) {
3695 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3710 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3711 if (!RegSrc->
isReg())
3713 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3714 ST.getConstantBusLimit(
Opc) < 2)
3717 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3729 if (Def && Def->isMoveImmediate() &&
3740 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3741 NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3751 unsigned SrcSubReg = RegSrc->
getSubReg();
3756 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3757 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3758 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3759 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3760 UseMI.untieRegOperand(
3761 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3768 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3770 DefMI.eraseFromParent();
3777 if (ST.getConstantBusLimit(
Opc) < 2) {
3780 bool Src0Inlined =
false;
3781 if (Src0->
isReg()) {
3786 if (Def && Def->isMoveImmediate() &&
3791 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3798 if (Src1->
isReg() && !Src0Inlined) {
3801 if (Def && Def->isMoveImmediate() &&
3803 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3805 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3818 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3819 NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3825 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3826 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3827 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3828 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3829 UseMI.untieRegOperand(
3830 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3832 const std::optional<int64_t> SubRegImm =
3846 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3848 DefMI.eraseFromParent();
3860 if (BaseOps1.
size() != BaseOps2.
size())
3862 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3863 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3871 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3872 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3873 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3875 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3878bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3881 int64_t Offset0, Offset1;
3884 bool Offset0IsScalable, Offset1IsScalable;
3898 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3899 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3906 "MIa must load from or modify a memory location");
3908 "MIb must load from or modify a memory location");
3930 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3937 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3947 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3961 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3972 if (
Reg.isPhysical())
3974 auto *Def =
MRI.getUniqueVRegDef(
Reg);
3976 Imm = Def->getOperand(1).getImm();
3996 unsigned NumOps =
MI.getNumOperands();
3999 if (
Op.isReg() &&
Op.isKill())
4007 case AMDGPU::V_MAC_F16_e32:
4008 case AMDGPU::V_MAC_F16_e64:
4009 return AMDGPU::V_MAD_F16_e64;
4010 case AMDGPU::V_MAC_F32_e32:
4011 case AMDGPU::V_MAC_F32_e64:
4012 return AMDGPU::V_MAD_F32_e64;
4013 case AMDGPU::V_MAC_LEGACY_F32_e32:
4014 case AMDGPU::V_MAC_LEGACY_F32_e64:
4015 return AMDGPU::V_MAD_LEGACY_F32_e64;
4016 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4017 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4018 return AMDGPU::V_FMA_LEGACY_F32_e64;
4019 case AMDGPU::V_FMAC_F16_e32:
4020 case AMDGPU::V_FMAC_F16_e64:
4021 case AMDGPU::V_FMAC_F16_t16_e64:
4022 case AMDGPU::V_FMAC_F16_fake16_e64:
4023 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4024 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4025 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4026 : AMDGPU::V_FMA_F16_gfx9_e64;
4027 case AMDGPU::V_FMAC_F32_e32:
4028 case AMDGPU::V_FMAC_F32_e64:
4029 return AMDGPU::V_FMA_F32_e64;
4030 case AMDGPU::V_FMAC_F64_e32:
4031 case AMDGPU::V_FMAC_F64_e64:
4032 return AMDGPU::V_FMA_F64_e64;
4059 if (Def.isEarlyClobber() && Def.isReg() &&
4064 auto UpdateDefIndex = [&](
LiveRange &LR) {
4065 auto *S = LR.find(OldIndex);
4066 if (S != LR.end() && S->start == OldIndex) {
4067 assert(S->valno && S->valno->def == OldIndex);
4068 S->start = NewIndex;
4069 S->valno->def = NewIndex;
4073 for (
auto &SR : LI.subranges())
4079 if (U.RemoveMIUse) {
4082 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4084 if (
MRI.hasOneNonDBGUse(DefReg)) {
4086 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4087 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4088 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4089 U.RemoveMIUse->removeOperand(
I);
4101 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4103 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4104 MIOp.setIsUndef(
true);
4105 MIOp.setReg(DummyReg);
4118 ThreeAddressUpdates &U)
const {
4120 unsigned Opc =
MI.getOpcode();
4124 if (NewMFMAOpc != -1) {
4127 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
4128 MIB.
add(
MI.getOperand(
I));
4136 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
4141 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4142 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4143 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4147 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4148 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4149 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4150 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4151 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4152 bool Src0Literal =
false;
4157 case AMDGPU::V_MAC_F16_e64:
4158 case AMDGPU::V_FMAC_F16_e64:
4159 case AMDGPU::V_FMAC_F16_t16_e64:
4160 case AMDGPU::V_FMAC_F16_fake16_e64:
4161 case AMDGPU::V_MAC_F32_e64:
4162 case AMDGPU::V_MAC_LEGACY_F32_e64:
4163 case AMDGPU::V_FMAC_F32_e64:
4164 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4165 case AMDGPU::V_FMAC_F64_e64:
4167 case AMDGPU::V_MAC_F16_e32:
4168 case AMDGPU::V_FMAC_F16_e32:
4169 case AMDGPU::V_MAC_F32_e32:
4170 case AMDGPU::V_MAC_LEGACY_F32_e32:
4171 case AMDGPU::V_FMAC_F32_e32:
4172 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4173 case AMDGPU::V_FMAC_F64_e32: {
4174 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4175 AMDGPU::OpName::src0);
4176 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4187 MachineInstrBuilder MIB;
4190 const MachineOperand *Src0Mods =
4193 const MachineOperand *Src1Mods =
4196 const MachineOperand *Src2Mods =
4202 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4203 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4205 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4207 MachineInstr *
DefMI;
4243 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4259 if (Src0Literal && !ST.hasVOP3Literal())
4287 switch (
MI.getOpcode()) {
4288 case AMDGPU::S_SET_GPR_IDX_ON:
4289 case AMDGPU::S_SET_GPR_IDX_MODE:
4290 case AMDGPU::S_SET_GPR_IDX_OFF:
4308 if (
MI.isTerminator() ||
MI.isPosition())
4312 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4315 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4321 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4322 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4323 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4324 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4325 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4330 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4331 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4332 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4340 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4349 if (
MI.memoperands_empty())
4354 unsigned AS = Memop->getAddrSpace();
4355 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4356 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4357 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4358 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4373 if (
MI.memoperands_empty())
4382 unsigned AS = Memop->getAddrSpace();
4399 if (ST.isTgSplitEnabled())
4404 if (
MI.memoperands_empty())
4409 unsigned AS = Memop->getAddrSpace();
4425 unsigned Opcode =
MI.getOpcode();
4440 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4441 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4442 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4445 if (
MI.isCall() ||
MI.isInlineAsm())
4461 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4462 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4463 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4464 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4472 if (
MI.isMetaInstruction())
4476 if (
MI.isCopyLike()) {
4477 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4481 return MI.readsRegister(AMDGPU::EXEC, &RI);
4492 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4496 switch (Imm.getBitWidth()) {
4502 ST.hasInv2PiInlineImm());
4505 ST.hasInv2PiInlineImm());
4507 return ST.has16BitInsts() &&
4509 ST.hasInv2PiInlineImm());
4516 APInt IntImm = Imm.bitcastToAPInt();
4518 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4526 return ST.has16BitInsts() &&
4529 return ST.has16BitInsts() &&
4539 switch (OperandType) {
4549 int32_t Trunc =
static_cast<int32_t
>(Imm);
4589 int16_t Trunc =
static_cast<int16_t
>(Imm);
4590 return ST.has16BitInsts() &&
4599 int16_t Trunc =
static_cast<int16_t
>(Imm);
4600 return ST.has16BitInsts() &&
4651 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4657 return ST.hasVOP3Literal();
4661 int64_t ImmVal)
const {
4664 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4665 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4666 AMDGPU::OpName::src2))
4668 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4680 "unexpected imm-like operand kind");
4693 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4711 AMDGPU::OpName
OpName)
const {
4713 return Mods && Mods->
getImm();
4726 switch (
MI.getOpcode()) {
4727 default:
return false;
4729 case AMDGPU::V_ADDC_U32_e64:
4730 case AMDGPU::V_SUBB_U32_e64:
4731 case AMDGPU::V_SUBBREV_U32_e64: {
4739 case AMDGPU::V_MAC_F16_e64:
4740 case AMDGPU::V_MAC_F32_e64:
4741 case AMDGPU::V_MAC_LEGACY_F32_e64:
4742 case AMDGPU::V_FMAC_F16_e64:
4743 case AMDGPU::V_FMAC_F16_t16_e64:
4744 case AMDGPU::V_FMAC_F16_fake16_e64:
4745 case AMDGPU::V_FMAC_F32_e64:
4746 case AMDGPU::V_FMAC_F64_e64:
4747 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4753 case AMDGPU::V_CNDMASK_B32_e64:
4759 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4789 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4798 unsigned Op32)
const {
4812 Inst32.
add(
MI.getOperand(
I));
4816 int Idx =
MI.getNumExplicitDefs();
4818 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4823 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4845 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4853 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4856 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4857 AMDGPU::SReg_64RegClass.contains(Reg);
4863 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4875 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
4885 switch (MO.getReg()) {
4887 case AMDGPU::VCC_LO:
4888 case AMDGPU::VCC_HI:
4890 case AMDGPU::FLAT_SCR:
4903 switch (
MI.getOpcode()) {
4904 case AMDGPU::V_READLANE_B32:
4905 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4906 case AMDGPU::V_WRITELANE_B32:
4907 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4914 if (
MI.isPreISelOpcode() ||
4915 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4930 if (
SubReg.getReg().isPhysical())
4933 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4944 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
4945 ErrInfo =
"illegal copy from vector register to SGPR";
4963 if (!
MRI.isSSA() &&
MI.isCopy())
4964 return verifyCopy(
MI,
MRI, ErrInfo);
4966 if (SIInstrInfo::isGenericOpcode(Opcode))
4969 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4970 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4971 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4973 if (Src0Idx == -1) {
4975 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
4976 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
4977 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
4978 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
4983 if (!
Desc.isVariadic() &&
4984 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
4985 ErrInfo =
"Instruction has wrong number of operands.";
4989 if (
MI.isInlineAsm()) {
5002 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5003 ErrInfo =
"inlineasm operand has incorrect register class.";
5011 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5012 ErrInfo =
"missing memory operand from image instruction.";
5017 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5020 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5021 "all fp values to integers.";
5026 int16_t RegClass = getOpRegClassID(OpInfo);
5028 switch (OpInfo.OperandType) {
5030 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5031 ErrInfo =
"Illegal immediate value for operand.";
5065 ErrInfo =
"Illegal immediate value for operand.";
5072 ErrInfo =
"Expected inline constant for operand.";
5087 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5088 ErrInfo =
"Expected immediate, but got non-immediate";
5097 if (OpInfo.isGenericType())
5112 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5114 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5116 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5117 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5124 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5125 ErrInfo =
"Subtarget requires even aligned vector registers";
5130 if (RegClass != -1) {
5131 if (Reg.isVirtual())
5136 ErrInfo =
"Operand has incorrect register class.";
5144 if (!ST.hasSDWA()) {
5145 ErrInfo =
"SDWA is not supported on this target";
5149 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5150 AMDGPU::OpName::dst_sel}) {
5154 int64_t Imm = MO->
getImm();
5156 ErrInfo =
"Invalid SDWA selection";
5161 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5163 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5168 if (!ST.hasSDWAScalar()) {
5170 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5171 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5178 "Only reg allowed as operands in SDWA instructions on GFX9+";
5184 if (!ST.hasSDWAOmod()) {
5187 if (OMod !=
nullptr &&
5189 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5194 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5195 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5196 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5197 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5200 unsigned Mods = Src0ModsMO->
getImm();
5203 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5209 if (
isVOPC(BasicOpcode)) {
5210 if (!ST.hasSDWASdst() && DstIdx != -1) {
5213 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5214 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5217 }
else if (!ST.hasSDWAOutModsVOPC()) {
5220 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5221 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5227 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5228 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5235 if (DstUnused && DstUnused->isImm() &&
5238 if (!Dst.isReg() || !Dst.isTied()) {
5239 ErrInfo =
"Dst register should have tied register";
5244 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5247 "Dst register should be tied to implicit use of preserved register";
5251 ErrInfo =
"Dst register should use same physical register as preserved";
5258 if (
isImage(Opcode) && !
MI.mayStore()) {
5270 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5278 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5282 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5283 if (RegCount > DstSize) {
5284 ErrInfo =
"Image instruction returns too many registers for dst "
5293 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5294 unsigned ConstantBusCount = 0;
5295 bool UsesLiteral =
false;
5298 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5302 LiteralVal = &
MI.getOperand(ImmIdx);
5311 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5322 }
else if (!MO.
isFI()) {
5329 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5339 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5340 return !RI.regsOverlap(SGPRUsed, SGPR);
5349 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5350 Opcode != AMDGPU::V_WRITELANE_B32) {
5351 ErrInfo =
"VOP* instruction violates constant bus restriction";
5355 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5356 ErrInfo =
"VOP3 instruction uses literal";
5363 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5364 unsigned SGPRCount = 0;
5367 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5375 if (MO.
getReg() != SGPRUsed)
5380 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5381 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5388 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5389 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5396 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5406 ErrInfo =
"ABS not allowed in VOP3B instructions";
5419 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5426 if (
Desc.isBranch()) {
5428 ErrInfo =
"invalid branch target for SOPK instruction";
5435 ErrInfo =
"invalid immediate for SOPK instruction";
5440 ErrInfo =
"invalid immediate for SOPK instruction";
5447 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5448 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5449 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5450 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5451 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5452 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5454 const unsigned StaticNumOps =
5455 Desc.getNumOperands() +
Desc.implicit_uses().size();
5456 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5461 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5462 ErrInfo =
"missing implicit register operands";
5468 if (!Dst->isUse()) {
5469 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5474 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5475 UseOpIdx != StaticNumOps + 1) {
5476 ErrInfo =
"movrel implicit operands should be tied";
5483 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5485 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5486 ErrInfo =
"src0 should be subreg of implicit vector use";
5494 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5495 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5501 if (
MI.mayStore() &&
5506 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5507 ErrInfo =
"scalar stores must use m0 as offset register";
5513 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5515 if (
Offset->getImm() != 0) {
5516 ErrInfo =
"subtarget does not support offsets in flat instructions";
5521 if (
isDS(
MI) && !ST.hasGDS()) {
5523 if (GDSOp && GDSOp->
getImm() != 0) {
5524 ErrInfo =
"GDS is not supported on this subtarget";
5532 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5533 AMDGPU::OpName::vaddr0);
5534 AMDGPU::OpName RSrcOpName =
5535 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5536 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5544 ErrInfo =
"dim is out of range";
5549 if (ST.hasR128A16()) {
5551 IsA16 = R128A16->
getImm() != 0;
5552 }
else if (ST.hasA16()) {
5554 IsA16 = A16->
getImm() != 0;
5557 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5559 unsigned AddrWords =
5562 unsigned VAddrWords;
5564 VAddrWords = RsrcIdx - VAddr0Idx;
5565 if (ST.hasPartialNSAEncoding() &&
5567 unsigned LastVAddrIdx = RsrcIdx - 1;
5568 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5576 if (VAddrWords != AddrWords) {
5578 <<
" but got " << VAddrWords <<
"\n");
5579 ErrInfo =
"bad vaddr size";
5589 unsigned DC = DppCt->
getImm();
5590 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5591 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5592 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5593 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5594 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5595 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5596 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5597 ErrInfo =
"Invalid dpp_ctrl value";
5600 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5602 ErrInfo =
"Invalid dpp_ctrl value: "
5603 "wavefront shifts are not supported on GFX10+";
5606 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5608 ErrInfo =
"Invalid dpp_ctrl value: "
5609 "broadcasts are not supported on GFX10+";
5612 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5614 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5615 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5616 !ST.hasGFX90AInsts()) {
5617 ErrInfo =
"Invalid dpp_ctrl value: "
5618 "row_newbroadcast/row_share is not supported before "
5622 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5623 ErrInfo =
"Invalid dpp_ctrl value: "
5624 "row_share and row_xmask are not supported before GFX10";
5629 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5632 ErrInfo =
"Invalid dpp_ctrl value: "
5633 "DP ALU dpp only support row_newbcast";
5640 AMDGPU::OpName DataName =
5641 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5647 if (ST.hasGFX90AInsts()) {
5648 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5649 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5650 ErrInfo =
"Invalid register class: "
5651 "vdata and vdst should be both VGPR or AGPR";
5654 if (
Data && Data2 &&
5656 ErrInfo =
"Invalid register class: "
5657 "both data operands should be VGPR or AGPR";
5661 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5663 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5664 ErrInfo =
"Invalid register class: "
5665 "agpr loads and stores not supported on this GPU";
5671 if (ST.needsAlignedVGPRs()) {
5672 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5677 if (Reg.isPhysical())
5678 return !(RI.getHWRegIndex(Reg) & 1);
5680 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5681 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5684 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5685 Opcode == AMDGPU::DS_GWS_BARRIER) {
5687 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5688 ErrInfo =
"Subtarget requires even aligned vector registers "
5689 "for DS_GWS instructions";
5695 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5696 ErrInfo =
"Subtarget requires even aligned vector registers "
5697 "for vaddr operand of image instructions";
5703 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5705 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5706 ErrInfo =
"Invalid register class: "
5707 "v_accvgpr_write with an SGPR is not supported on this GPU";
5712 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5715 ErrInfo =
"pseudo expects only physical SGPRs";
5722 if (!ST.hasScaleOffset()) {
5723 ErrInfo =
"Subtarget does not support offset scaling";
5727 ErrInfo =
"Instruction does not support offset scaling";
5736 for (
unsigned I = 0;
I < 3; ++
I) {
5749 switch (
MI.getOpcode()) {
5750 default:
return AMDGPU::INSTRUCTION_LIST_END;
5751 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5752 case AMDGPU::COPY:
return AMDGPU::COPY;
5753 case AMDGPU::PHI:
return AMDGPU::PHI;
5754 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5755 case AMDGPU::WQM:
return AMDGPU::WQM;
5756 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5757 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5758 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5759 case AMDGPU::S_MOV_B32: {
5761 return MI.getOperand(1).isReg() ||
5762 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5763 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5765 case AMDGPU::S_ADD_I32:
5766 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5767 case AMDGPU::S_ADDC_U32:
5768 return AMDGPU::V_ADDC_U32_e32;
5769 case AMDGPU::S_SUB_I32:
5770 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5773 case AMDGPU::S_ADD_U32:
5774 return AMDGPU::V_ADD_CO_U32_e32;
5775 case AMDGPU::S_SUB_U32:
5776 return AMDGPU::V_SUB_CO_U32_e32;
5777 case AMDGPU::S_ADD_U64_PSEUDO:
5778 return AMDGPU::V_ADD_U64_PSEUDO;
5779 case AMDGPU::S_SUB_U64_PSEUDO:
5780 return AMDGPU::V_SUB_U64_PSEUDO;
5781 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5782 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5783 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5784 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5785 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5786 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5787 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5788 case AMDGPU::S_XNOR_B32:
5789 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5790 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5791 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5792 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5793 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5794 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5795 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5796 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5797 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5798 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5799 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5800 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5801 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5802 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5803 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5804 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5805 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5806 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5807 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5808 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5809 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5810 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5811 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5812 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5813 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5814 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5815 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5816 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5817 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5818 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5819 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5820 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5821 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5822 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5823 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5824 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5825 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5826 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5827 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5828 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5829 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5830 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5831 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5832 case AMDGPU::S_CVT_F32_F16:
5833 case AMDGPU::S_CVT_HI_F32_F16:
5834 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5835 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5836 case AMDGPU::S_CVT_F16_F32:
5837 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5838 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5839 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5840 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5841 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5842 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5843 case AMDGPU::S_CEIL_F16:
5844 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5845 : AMDGPU::V_CEIL_F16_fake16_e64;
5846 case AMDGPU::S_FLOOR_F16:
5847 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
5848 : AMDGPU::V_FLOOR_F16_fake16_e64;
5849 case AMDGPU::S_TRUNC_F16:
5850 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5851 : AMDGPU::V_TRUNC_F16_fake16_e64;
5852 case AMDGPU::S_RNDNE_F16:
5853 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5854 : AMDGPU::V_RNDNE_F16_fake16_e64;
5855 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
5856 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
5857 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
5858 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
5859 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
5860 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
5861 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
5862 case AMDGPU::S_ADD_F16:
5863 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5864 : AMDGPU::V_ADD_F16_fake16_e64;
5865 case AMDGPU::S_SUB_F16:
5866 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5867 : AMDGPU::V_SUB_F16_fake16_e64;
5868 case AMDGPU::S_MIN_F16:
5869 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5870 : AMDGPU::V_MIN_F16_fake16_e64;
5871 case AMDGPU::S_MAX_F16:
5872 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5873 : AMDGPU::V_MAX_F16_fake16_e64;
5874 case AMDGPU::S_MINIMUM_F16:
5875 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5876 : AMDGPU::V_MINIMUM_F16_fake16_e64;
5877 case AMDGPU::S_MAXIMUM_F16:
5878 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5879 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
5880 case AMDGPU::S_MUL_F16:
5881 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5882 : AMDGPU::V_MUL_F16_fake16_e64;
5883 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5884 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
5885 case AMDGPU::S_FMAC_F16:
5886 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5887 : AMDGPU::V_FMAC_F16_fake16_e64;
5888 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
5889 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
5890 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
5891 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
5892 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
5893 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
5894 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
5895 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
5896 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
5897 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
5898 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
5899 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
5900 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
5901 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
5902 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
5903 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
5904 case AMDGPU::S_CMP_LT_F16:
5905 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
5906 : AMDGPU::V_CMP_LT_F16_fake16_e64;
5907 case AMDGPU::S_CMP_EQ_F16:
5908 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
5909 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
5910 case AMDGPU::S_CMP_LE_F16:
5911 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
5912 : AMDGPU::V_CMP_LE_F16_fake16_e64;
5913 case AMDGPU::S_CMP_GT_F16:
5914 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
5915 : AMDGPU::V_CMP_GT_F16_fake16_e64;
5916 case AMDGPU::S_CMP_LG_F16:
5917 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
5918 : AMDGPU::V_CMP_LG_F16_fake16_e64;
5919 case AMDGPU::S_CMP_GE_F16:
5920 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
5921 : AMDGPU::V_CMP_GE_F16_fake16_e64;
5922 case AMDGPU::S_CMP_O_F16:
5923 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
5924 : AMDGPU::V_CMP_O_F16_fake16_e64;
5925 case AMDGPU::S_CMP_U_F16:
5926 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
5927 : AMDGPU::V_CMP_U_F16_fake16_e64;
5928 case AMDGPU::S_CMP_NGE_F16:
5929 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
5930 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
5931 case AMDGPU::S_CMP_NLG_F16:
5932 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
5933 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
5934 case AMDGPU::S_CMP_NGT_F16:
5935 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
5936 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
5937 case AMDGPU::S_CMP_NLE_F16:
5938 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
5939 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
5940 case AMDGPU::S_CMP_NEQ_F16:
5941 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
5942 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
5943 case AMDGPU::S_CMP_NLT_F16:
5944 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
5945 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
5946 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
5947 case AMDGPU::V_S_EXP_F16_e64:
5948 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5949 : AMDGPU::V_EXP_F16_fake16_e64;
5950 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
5951 case AMDGPU::V_S_LOG_F16_e64:
5952 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5953 : AMDGPU::V_LOG_F16_fake16_e64;
5954 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
5955 case AMDGPU::V_S_RCP_F16_e64:
5956 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5957 : AMDGPU::V_RCP_F16_fake16_e64;
5958 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
5959 case AMDGPU::V_S_RSQ_F16_e64:
5960 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5961 : AMDGPU::V_RSQ_F16_fake16_e64;
5962 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
5963 case AMDGPU::V_S_SQRT_F16_e64:
5964 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5965 : AMDGPU::V_SQRT_F16_fake16_e64;
5968 "Unexpected scalar opcode without corresponding vector one!");
6017 "Not a whole wave func");
6020 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6021 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6036 int16_t RegClass = getOpRegClassID(OpInfo);
6037 return RI.getRegClass(RegClass);
6041 unsigned OpNo)
const {
6043 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6044 Desc.operands()[OpNo].RegClass == -1) {
6047 if (Reg.isVirtual()) {
6049 MI.getParent()->getParent()->getRegInfo();
6050 return MRI.getRegClass(Reg);
6052 return RI.getPhysRegBaseClass(Reg);
6055 return RI.getRegClass(getOpRegClassID(
Desc.operands()[OpNo]));
6063 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6065 unsigned Size = RI.getRegSizeInBits(*RC);
6066 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6067 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6068 : AMDGPU::V_MOV_B32_e32;
6070 Opcode = AMDGPU::COPY;
6071 else if (RI.isSGPRClass(RC))
6072 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6086 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6092 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6103 if (SubIdx == AMDGPU::sub0)
6105 if (SubIdx == AMDGPU::sub1)
6117void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6133 if (Reg.isPhysical())
6143 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6146 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6153 unsigned Opc =
MI.getOpcode();
6159 constexpr AMDGPU::OpName OpNames[] = {
6160 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6163 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6164 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6174 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6175 if (IsAGPR && !ST.hasMAIInsts())
6177 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6181 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6182 const int DataIdx = AMDGPU::getNamedOperandIdx(
6183 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6184 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6185 MI.getOperand(DataIdx).isReg() &&
6186 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6188 if ((
int)
OpIdx == DataIdx) {
6189 if (VDstIdx != -1 &&
6190 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6193 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6194 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6195 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6200 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6201 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6221 constexpr unsigned NumOps = 3;
6222 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6223 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6224 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6225 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6230 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6233 MO = &
MI.getOperand(SrcIdx);
6240 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6244 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6248 return !OpSel && !OpSelHi;
6257 int64_t RegClass = getOpRegClassID(OpInfo);
6259 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6268 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6269 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6273 if (!LiteralLimit--)
6283 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6291 if (--ConstantBusLimit <= 0)
6303 if (!LiteralLimit--)
6305 if (--ConstantBusLimit <= 0)
6311 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6315 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6317 !
Op.isIdenticalTo(*MO))
6327 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6341 bool Is64BitOp = Is64BitFPOp ||
6348 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6357 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6375 bool IsGFX950Only = ST.hasGFX950Insts();
6376 bool IsGFX940Only = ST.hasGFX940Insts();
6378 if (!IsGFX950Only && !IsGFX940Only)
6396 unsigned Opcode =
MI.getOpcode();
6398 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6399 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6400 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6401 case AMDGPU::V_MQSAD_U32_U8_e64:
6402 case AMDGPU::V_PK_ADD_F16:
6403 case AMDGPU::V_PK_ADD_F32:
6404 case AMDGPU::V_PK_ADD_I16:
6405 case AMDGPU::V_PK_ADD_U16:
6406 case AMDGPU::V_PK_ASHRREV_I16:
6407 case AMDGPU::V_PK_FMA_F16:
6408 case AMDGPU::V_PK_FMA_F32:
6409 case AMDGPU::V_PK_FMAC_F16_e32:
6410 case AMDGPU::V_PK_FMAC_F16_e64:
6411 case AMDGPU::V_PK_LSHLREV_B16:
6412 case AMDGPU::V_PK_LSHRREV_B16:
6413 case AMDGPU::V_PK_MAD_I16:
6414 case AMDGPU::V_PK_MAD_U16:
6415 case AMDGPU::V_PK_MAX_F16:
6416 case AMDGPU::V_PK_MAX_I16:
6417 case AMDGPU::V_PK_MAX_U16:
6418 case AMDGPU::V_PK_MIN_F16:
6419 case AMDGPU::V_PK_MIN_I16:
6420 case AMDGPU::V_PK_MIN_U16:
6421 case AMDGPU::V_PK_MOV_B32:
6422 case AMDGPU::V_PK_MUL_F16:
6423 case AMDGPU::V_PK_MUL_F32:
6424 case AMDGPU::V_PK_MUL_LO_U16:
6425 case AMDGPU::V_PK_SUB_I16:
6426 case AMDGPU::V_PK_SUB_U16:
6427 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6436 unsigned Opc =
MI.getOpcode();
6439 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6442 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6448 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6455 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6458 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6464 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6474 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6475 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6476 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6488 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6490 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6502 if (HasImplicitSGPR || !
MI.isCommutable()) {
6519 if (CommutedOpc == -1) {
6524 MI.setDesc(
get(CommutedOpc));
6528 bool Src0Kill = Src0.
isKill();
6532 else if (Src1.
isReg()) {
6547 unsigned Opc =
MI.getOpcode();
6550 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6551 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6552 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6555 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6556 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6557 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6558 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6559 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6560 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6561 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6565 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6566 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6571 if (VOP3Idx[2] != -1) {
6573 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6574 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6583 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6584 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6586 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6588 SGPRsUsed.
insert(SGPRReg);
6592 for (
int Idx : VOP3Idx) {
6601 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6613 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6620 if (ConstantBusLimit > 0) {
6632 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6633 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6639 for (
unsigned I = 0;
I < 3; ++
I) {
6652 SRC = RI.getCommonSubClass(SRC, DstRC);
6655 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6657 if (RI.hasAGPRs(VRC)) {
6658 VRC = RI.getEquivalentVGPRClass(VRC);
6659 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6661 get(TargetOpcode::COPY), NewSrcReg)
6668 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6674 for (
unsigned i = 0; i < SubRegs; ++i) {
6675 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6677 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6678 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
6684 get(AMDGPU::REG_SEQUENCE), DstReg);
6685 for (
unsigned i = 0; i < SubRegs; ++i) {
6687 MIB.
addImm(RI.getSubRegFromChannel(i));
6700 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6702 SBase->setReg(SGPR);
6705 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6713 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6714 if (OldSAddrIdx < 0)
6730 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6731 if (NewVAddrIdx < 0)
6734 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6738 if (OldVAddrIdx >= 0) {
6740 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6752 if (OldVAddrIdx == NewVAddrIdx) {
6755 MRI.removeRegOperandFromUseList(&NewVAddr);
6756 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6760 MRI.removeRegOperandFromUseList(&NewVAddr);
6761 MRI.addRegOperandToUseList(&NewVAddr);
6763 assert(OldSAddrIdx == NewVAddrIdx);
6765 if (OldVAddrIdx >= 0) {
6766 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6767 AMDGPU::OpName::vdst_in);
6771 if (NewVDstIn != -1) {
6772 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6778 if (NewVDstIn != -1) {
6779 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6800 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6820 unsigned OpSubReg =
Op.getSubReg();
6823 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6829 Register DstReg =
MRI.createVirtualRegister(DstRC);
6839 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
6842 bool ImpDef = Def->isImplicitDef();
6843 while (!ImpDef && Def && Def->isCopy()) {
6844 if (Def->getOperand(1).getReg().isPhysical())
6846 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
6847 ImpDef = Def && Def->isImplicitDef();
6849 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
6868 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
6874 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
6875 unsigned NumSubRegs =
RegSize / 32;
6876 Register VScalarOp = ScalarOp->getReg();
6878 if (NumSubRegs == 1) {
6879 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6881 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
6884 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6886 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
6892 CondReg = NewCondReg;
6894 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6902 ScalarOp->setReg(CurReg);
6903 ScalarOp->setIsKill();
6907 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
6908 "Unhandled register size");
6910 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
6912 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6914 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6917 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
6918 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
6921 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
6922 .
addReg(VScalarOp, VScalarOpUndef,
6923 TRI->getSubRegFromChannel(Idx + 1));
6929 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
6930 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
6936 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
6937 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
6940 if (NumSubRegs <= 2)
6941 Cmp.addReg(VScalarOp);
6943 Cmp.addReg(VScalarOp, VScalarOpUndef,
6944 TRI->getSubRegFromChannel(Idx, 2));
6948 CondReg = NewCondReg;
6950 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
6958 const auto *SScalarOpRC =
6959 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
6960 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
6964 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
6965 unsigned Channel = 0;
6966 for (
Register Piece : ReadlanePieces) {
6967 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
6971 ScalarOp->setReg(SScalarOp);
6972 ScalarOp->setIsKill();
6976 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
6977 MRI.setSimpleHint(SaveExec, CondReg);
7008 if (!Begin.isValid())
7010 if (!End.isValid()) {
7016 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7024 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7025 std::numeric_limits<unsigned>::max()) !=
7028 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7034 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7043 for (
auto I = Begin;
I != AfterMI;
I++) {
7044 for (
auto &MO :
I->all_uses())
7045 MRI.clearKillFlags(MO.getReg());
7070 MBB.addSuccessor(LoopBB);
7080 for (
auto &Succ : RemainderBB->
successors()) {
7104static std::tuple<unsigned, unsigned>
7112 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7113 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7116 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7117 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7118 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7119 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7120 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7137 .
addImm(AMDGPU::sub0_sub1)
7143 return std::tuple(RsrcPtr, NewSRsrc);
7180 if (
MI.getOpcode() == AMDGPU::PHI) {
7182 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
7183 if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
7186 MRI.getRegClass(
MI.getOperand(i).getReg());
7187 if (RI.hasVectorRegisters(OpRC)) {
7201 VRC = &AMDGPU::VReg_1RegClass;
7204 ? RI.getEquivalentAGPRClass(SRC)
7205 : RI.getEquivalentVGPRClass(SRC);
7208 ? RI.getEquivalentAGPRClass(VRC)
7209 : RI.getEquivalentVGPRClass(VRC);
7217 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7219 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7235 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7238 if (RI.hasVGPRs(DstRC)) {
7242 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7244 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7262 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7267 if (DstRC != Src0RC) {
7276 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7278 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7284 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7285 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7286 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7287 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7288 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7289 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7290 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7292 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7305 ? AMDGPU::OpName::rsrc
7306 : AMDGPU::OpName::srsrc;
7308 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7311 AMDGPU::OpName SampOpName =
7312 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7314 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7321 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7323 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7327 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7328 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7333 while (Start->getOpcode() != FrameSetupOpcode)
7336 while (End->getOpcode() != FrameDestroyOpcode)
7340 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7341 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7349 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7351 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7353 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7363 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7364 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7365 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7366 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7368 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7375 bool isSoffsetLegal =
true;
7377 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7378 if (SoffsetIdx != -1) {
7381 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7382 isSoffsetLegal =
false;
7386 bool isRsrcLegal =
true;
7388 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7389 if (RsrcIdx != -1) {
7392 isRsrcLegal =
false;
7396 if (isRsrcLegal && isSoffsetLegal)
7420 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7421 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7422 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7424 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7425 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7426 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7428 unsigned RsrcPtr, NewSRsrc;
7435 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7442 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7456 }
else if (!VAddr && ST.hasAddr64()) {
7460 "FIXME: Need to emit flat atomics here");
7462 unsigned RsrcPtr, NewSRsrc;
7465 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7488 MIB.
addImm(CPol->getImm());
7493 MIB.
addImm(TFE->getImm());
7513 MI.removeFromParent();
7518 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
7520 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
7524 if (!isSoffsetLegal) {
7536 if (!isSoffsetLegal) {
7548 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7549 if (RsrcIdx != -1) {
7550 DeferredList.insert(
MI);
7555 return DeferredList.contains(
MI);
7565 if (!ST.useRealTrue16Insts())
7568 unsigned Opcode =
MI.getOpcode();
7572 OpIdx >=
get(Opcode).getNumOperands() ||
7573 get(Opcode).operands()[
OpIdx].RegClass == -1)
7577 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7581 if (!RI.isVGPRClass(CurrRC))
7584 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7586 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7587 Op.setSubReg(AMDGPU::lo16);
7588 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7590 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7591 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
7598 Op.setReg(NewDstReg);
7610 while (!Worklist.
empty()) {
7624 "Deferred MachineInstr are not supposed to re-populate worklist");
7642 case AMDGPU::S_ADD_I32:
7643 case AMDGPU::S_SUB_I32: {
7647 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7655 case AMDGPU::S_MUL_U64:
7656 if (ST.hasVectorMulU64()) {
7657 NewOpcode = AMDGPU::V_MUL_U64_e64;
7661 splitScalarSMulU64(Worklist, Inst, MDT);
7665 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7666 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7669 splitScalarSMulPseudo(Worklist, Inst, MDT);
7673 case AMDGPU::S_AND_B64:
7674 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7678 case AMDGPU::S_OR_B64:
7679 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7683 case AMDGPU::S_XOR_B64:
7684 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7688 case AMDGPU::S_NAND_B64:
7689 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7693 case AMDGPU::S_NOR_B64:
7694 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7698 case AMDGPU::S_XNOR_B64:
7699 if (ST.hasDLInsts())
7700 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7702 splitScalar64BitXnor(Worklist, Inst, MDT);
7706 case AMDGPU::S_ANDN2_B64:
7707 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7711 case AMDGPU::S_ORN2_B64:
7712 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7716 case AMDGPU::S_BREV_B64:
7717 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7721 case AMDGPU::S_NOT_B64:
7722 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7726 case AMDGPU::S_BCNT1_I32_B64:
7727 splitScalar64BitBCNT(Worklist, Inst);
7731 case AMDGPU::S_BFE_I64:
7732 splitScalar64BitBFE(Worklist, Inst);
7736 case AMDGPU::S_FLBIT_I32_B64:
7737 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7740 case AMDGPU::S_FF1_I32_B64:
7741 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7745 case AMDGPU::S_LSHL_B32:
7746 if (ST.hasOnlyRevVALUShifts()) {
7747 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7751 case AMDGPU::S_ASHR_I32:
7752 if (ST.hasOnlyRevVALUShifts()) {
7753 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7757 case AMDGPU::S_LSHR_B32:
7758 if (ST.hasOnlyRevVALUShifts()) {
7759 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7763 case AMDGPU::S_LSHL_B64:
7764 if (ST.hasOnlyRevVALUShifts()) {
7766 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7767 : AMDGPU::V_LSHLREV_B64_e64;
7771 case AMDGPU::S_ASHR_I64:
7772 if (ST.hasOnlyRevVALUShifts()) {
7773 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7777 case AMDGPU::S_LSHR_B64:
7778 if (ST.hasOnlyRevVALUShifts()) {
7779 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7784 case AMDGPU::S_ABS_I32:
7785 lowerScalarAbs(Worklist, Inst);
7789 case AMDGPU::S_CBRANCH_SCC0:
7790 case AMDGPU::S_CBRANCH_SCC1: {
7793 bool IsSCC = CondReg == AMDGPU::SCC;
7801 case AMDGPU::S_BFE_U64:
7802 case AMDGPU::S_BFM_B64:
7805 case AMDGPU::S_PACK_LL_B32_B16:
7806 case AMDGPU::S_PACK_LH_B32_B16:
7807 case AMDGPU::S_PACK_HL_B32_B16:
7808 case AMDGPU::S_PACK_HH_B32_B16:
7809 movePackToVALU(Worklist,
MRI, Inst);
7813 case AMDGPU::S_XNOR_B32:
7814 lowerScalarXnor(Worklist, Inst);
7818 case AMDGPU::S_NAND_B32:
7819 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7823 case AMDGPU::S_NOR_B32:
7824 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7828 case AMDGPU::S_ANDN2_B32:
7829 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7833 case AMDGPU::S_ORN2_B32:
7834 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7842 case AMDGPU::S_ADD_CO_PSEUDO:
7843 case AMDGPU::S_SUB_CO_PSEUDO: {
7844 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7845 ? AMDGPU::V_ADDC_U32_e64
7846 : AMDGPU::V_SUBB_U32_e64;
7847 const auto *CarryRC = RI.getWaveMaskRegClass();
7850 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7851 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7858 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
7869 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
7873 case AMDGPU::S_UADDO_PSEUDO:
7874 case AMDGPU::S_USUBO_PSEUDO: {
7881 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
7882 ? AMDGPU::V_ADD_CO_U32_e64
7883 : AMDGPU::V_SUB_CO_U32_e64;
7885 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
7886 Register DestReg =
MRI.createVirtualRegister(NewRC);
7894 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
7901 case AMDGPU::S_CSELECT_B32:
7902 case AMDGPU::S_CSELECT_B64:
7903 lowerSelect(Worklist, Inst, MDT);
7906 case AMDGPU::S_CMP_EQ_I32:
7907 case AMDGPU::S_CMP_LG_I32:
7908 case AMDGPU::S_CMP_GT_I32:
7909 case AMDGPU::S_CMP_GE_I32:
7910 case AMDGPU::S_CMP_LT_I32:
7911 case AMDGPU::S_CMP_LE_I32:
7912 case AMDGPU::S_CMP_EQ_U32:
7913 case AMDGPU::S_CMP_LG_U32:
7914 case AMDGPU::S_CMP_GT_U32:
7915 case AMDGPU::S_CMP_GE_U32:
7916 case AMDGPU::S_CMP_LT_U32:
7917 case AMDGPU::S_CMP_LE_U32:
7918 case AMDGPU::S_CMP_EQ_U64:
7919 case AMDGPU::S_CMP_LG_U64:
7920 case AMDGPU::S_CMP_LT_F32:
7921 case AMDGPU::S_CMP_EQ_F32:
7922 case AMDGPU::S_CMP_LE_F32:
7923 case AMDGPU::S_CMP_GT_F32:
7924 case AMDGPU::S_CMP_LG_F32:
7925 case AMDGPU::S_CMP_GE_F32:
7926 case AMDGPU::S_CMP_O_F32:
7927 case AMDGPU::S_CMP_U_F32:
7928 case AMDGPU::S_CMP_NGE_F32:
7929 case AMDGPU::S_CMP_NLG_F32:
7930 case AMDGPU::S_CMP_NGT_F32:
7931 case AMDGPU::S_CMP_NLE_F32:
7932 case AMDGPU::S_CMP_NEQ_F32:
7933 case AMDGPU::S_CMP_NLT_F32: {
7934 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7938 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7952 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7956 case AMDGPU::S_CMP_LT_F16:
7957 case AMDGPU::S_CMP_EQ_F16:
7958 case AMDGPU::S_CMP_LE_F16:
7959 case AMDGPU::S_CMP_GT_F16:
7960 case AMDGPU::S_CMP_LG_F16:
7961 case AMDGPU::S_CMP_GE_F16:
7962 case AMDGPU::S_CMP_O_F16:
7963 case AMDGPU::S_CMP_U_F16:
7964 case AMDGPU::S_CMP_NGE_F16:
7965 case AMDGPU::S_CMP_NLG_F16:
7966 case AMDGPU::S_CMP_NGT_F16:
7967 case AMDGPU::S_CMP_NLE_F16:
7968 case AMDGPU::S_CMP_NEQ_F16:
7969 case AMDGPU::S_CMP_NLT_F16: {
7970 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
7992 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
7996 case AMDGPU::S_CVT_HI_F32_F16: {
7998 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7999 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8000 if (ST.useRealTrue16Insts()) {
8005 .
addReg(TmpReg, 0, AMDGPU::hi16)
8021 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8025 case AMDGPU::S_MINIMUM_F32:
8026 case AMDGPU::S_MAXIMUM_F32: {
8028 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8039 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8043 case AMDGPU::S_MINIMUM_F16:
8044 case AMDGPU::S_MAXIMUM_F16: {
8046 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8047 ? &AMDGPU::VGPR_16RegClass
8048 : &AMDGPU::VGPR_32RegClass);
8060 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8064 case AMDGPU::V_S_EXP_F16_e64:
8065 case AMDGPU::V_S_LOG_F16_e64:
8066 case AMDGPU::V_S_RCP_F16_e64:
8067 case AMDGPU::V_S_RSQ_F16_e64:
8068 case AMDGPU::V_S_SQRT_F16_e64: {
8070 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8071 ? &AMDGPU::VGPR_16RegClass
8072 : &AMDGPU::VGPR_32RegClass);
8084 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8090 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8098 if (NewOpcode == Opcode) {
8106 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8108 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8126 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8128 MRI.replaceRegWith(DstReg, NewDstReg);
8129 MRI.clearKillFlags(NewDstReg);
8143 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8147 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8148 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8149 Register Undef =
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
8151 get(AMDGPU::IMPLICIT_DEF), Undef);
8153 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8159 MRI.replaceRegWith(DstReg, NewDstReg);
8160 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8162 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8165 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8166 MRI.replaceRegWith(DstReg, NewDstReg);
8167 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8172 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8173 MRI.replaceRegWith(DstReg, NewDstReg);
8175 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8185 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8186 AMDGPU::OpName::src0_modifiers) >= 0)
8190 NewInstr->addOperand(Src);
8193 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8196 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8198 NewInstr.addImm(
Size);
8199 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8203 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8208 "Scalar BFE is only implemented for constant width and offset");
8216 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8217 AMDGPU::OpName::src1_modifiers) >= 0)
8219 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8221 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8222 AMDGPU::OpName::src2_modifiers) >= 0)
8224 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8226 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8228 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8230 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8236 NewInstr->addOperand(
Op);
8243 if (
Op.getReg() == AMDGPU::SCC) {
8245 if (
Op.isDef() && !
Op.isDead())
8246 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8248 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8253 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8254 Register DstReg = NewInstr->getOperand(0).getReg();
8259 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8260 MRI.replaceRegWith(DstReg, NewDstReg);
8269 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8273std::pair<bool, MachineBasicBlock *>
8285 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8288 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8290 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8291 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8299 MRI.replaceRegWith(OldDstReg, ResultReg);
8302 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8303 return std::pair(
true, NewBB);
8306 return std::pair(
false,
nullptr);
8323 bool IsSCC = (CondReg == AMDGPU::SCC);
8331 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8337 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8338 NewCondReg =
MRI.createVirtualRegister(TC);
8342 bool CopyFound =
false;
8343 for (MachineInstr &CandI :
8346 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8348 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8350 .
addReg(CandI.getOperand(1).getReg());
8362 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8370 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8371 MachineInstr *NewInst;
8372 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8373 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8386 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8388 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8400 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8401 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8403 unsigned SubOp = ST.hasAddNoCarry() ?
8404 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
8414 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8415 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8429 if (ST.hasDLInsts()) {
8430 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8438 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8439 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8445 bool Src0IsSGPR = Src0.
isReg() &&
8446 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8447 bool Src1IsSGPR = Src1.
isReg() &&
8448 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8450 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8451 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8461 }
else if (Src1IsSGPR) {
8475 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8479 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8485 unsigned Opcode)
const {
8495 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8496 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8508 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8509 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8514 unsigned Opcode)
const {
8524 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8525 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8537 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8538 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8553 const MCInstrDesc &InstDesc =
get(Opcode);
8554 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8556 &AMDGPU::SGPR_32RegClass;
8558 const TargetRegisterClass *Src0SubRC =
8559 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8562 AMDGPU::sub0, Src0SubRC);
8564 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8565 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8566 const TargetRegisterClass *NewDestSubRC =
8567 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8569 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8570 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8573 AMDGPU::sub1, Src0SubRC);
8575 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8576 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8581 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8588 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8590 Worklist.
insert(&LoHalf);
8591 Worklist.
insert(&HiHalf);
8597 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8608 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8609 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8610 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8618 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8619 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8620 const TargetRegisterClass *Src0SubRC =
8621 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8622 if (RI.isSGPRClass(Src0SubRC))
8623 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8624 const TargetRegisterClass *Src1SubRC =
8625 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8626 if (RI.isSGPRClass(Src1SubRC))
8627 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8631 MachineOperand Op0L =
8633 MachineOperand Op1L =
8635 MachineOperand Op0H =
8637 MachineOperand Op1H =
8655 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8656 MachineInstr *Op1L_Op0H =
8661 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8662 MachineInstr *Op1H_Op0L =
8667 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8668 MachineInstr *Carry =
8673 MachineInstr *LoHalf =
8678 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8683 MachineInstr *HiHalf =
8694 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8706 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8717 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8718 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8719 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8727 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8728 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8729 const TargetRegisterClass *Src0SubRC =
8730 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8731 if (RI.isSGPRClass(Src0SubRC))
8732 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8733 const TargetRegisterClass *Src1SubRC =
8734 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8735 if (RI.isSGPRClass(Src1SubRC))
8736 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8740 MachineOperand Op0L =
8742 MachineOperand Op1L =
8746 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8747 ? AMDGPU::V_MUL_HI_U32_e64
8748 : AMDGPU::V_MUL_HI_I32_e64;
8749 MachineInstr *HiHalf =
8752 MachineInstr *LoHalf =
8763 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8771 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8787 const MCInstrDesc &InstDesc =
get(Opcode);
8788 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8790 &AMDGPU::SGPR_32RegClass;
8792 const TargetRegisterClass *Src0SubRC =
8793 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8794 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8796 &AMDGPU::SGPR_32RegClass;
8798 const TargetRegisterClass *Src1SubRC =
8799 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8802 AMDGPU::sub0, Src0SubRC);
8804 AMDGPU::sub0, Src1SubRC);
8806 AMDGPU::sub1, Src0SubRC);
8808 AMDGPU::sub1, Src1SubRC);
8810 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8811 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8812 const TargetRegisterClass *NewDestSubRC =
8813 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8815 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8816 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
8820 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8821 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
8825 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8832 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8834 Worklist.
insert(&LoHalf);
8835 Worklist.
insert(&HiHalf);
8838 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8854 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8856 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
8858 MachineOperand* Op0;
8859 MachineOperand* Op1;
8872 Register NewDest =
MRI.createVirtualRegister(DestRC);
8878 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8894 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
8895 const TargetRegisterClass *SrcRC = Src.isReg() ?
8896 MRI.getRegClass(Src.getReg()) :
8897 &AMDGPU::SGPR_32RegClass;
8899 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8900 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8902 const TargetRegisterClass *SrcSubRC =
8903 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
8906 AMDGPU::sub0, SrcSubRC);
8908 AMDGPU::sub1, SrcSubRC);
8914 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8918 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8937 Offset == 0 &&
"Not implemented");
8940 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8941 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8942 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8959 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8960 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8965 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8966 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8970 .
addReg(Src.getReg(), 0, AMDGPU::sub0);
8973 .
addReg(Src.getReg(), 0, AMDGPU::sub0)
8978 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8979 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8998 const MCInstrDesc &InstDesc =
get(Opcode);
9000 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9001 unsigned OpcodeAdd =
9002 ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
9004 const TargetRegisterClass *SrcRC =
9005 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9006 const TargetRegisterClass *SrcSubRC =
9007 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9009 MachineOperand SrcRegSub0 =
9011 MachineOperand SrcRegSub1 =
9014 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9015 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9016 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9017 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9024 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9030 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9032 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9034 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9037void SIInstrInfo::addUsersToMoveToVALUWorklist(
9041 MachineInstr &
UseMI = *MO.getParent();
9045 switch (
UseMI.getOpcode()) {
9048 case AMDGPU::SOFT_WQM:
9049 case AMDGPU::STRICT_WWM:
9050 case AMDGPU::STRICT_WQM:
9051 case AMDGPU::REG_SEQUENCE:
9053 case AMDGPU::INSERT_SUBREG:
9056 OpNo = MO.getOperandNo();
9061 MRI.constrainRegClass(DstReg, OpRC);
9063 if (!RI.hasVectorRegisters(OpRC))
9074 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9080 if (ST.useRealTrue16Insts()) {
9083 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9090 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9096 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9097 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9099 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9101 case AMDGPU::S_PACK_LL_B32_B16:
9104 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9105 .addImm(AMDGPU::lo16)
9107 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9108 .addImm(AMDGPU::hi16);
9110 case AMDGPU::S_PACK_LH_B32_B16:
9113 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9114 .addImm(AMDGPU::lo16)
9115 .addReg(SrcReg1, 0, AMDGPU::hi16)
9116 .addImm(AMDGPU::hi16);
9118 case AMDGPU::S_PACK_HL_B32_B16:
9119 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9120 .addImm(AMDGPU::lo16)
9122 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9123 .addImm(AMDGPU::hi16);
9125 case AMDGPU::S_PACK_HH_B32_B16:
9126 NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
9127 .addImm(AMDGPU::lo16)
9128 .addReg(SrcReg1, 0, AMDGPU::hi16)
9129 .addImm(AMDGPU::hi16);
9136 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9137 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9142 case AMDGPU::S_PACK_LL_B32_B16: {
9143 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9144 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9161 case AMDGPU::S_PACK_LH_B32_B16: {
9162 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9171 case AMDGPU::S_PACK_HL_B32_B16: {
9172 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9182 case AMDGPU::S_PACK_HH_B32_B16: {
9183 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9184 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9201 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9202 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9211 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9212 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9213 SmallVector<MachineInstr *, 4> CopyToDelete;
9216 for (MachineInstr &
MI :
9220 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9223 MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9224 Register DestReg =
MI.getOperand(0).getReg();
9226 MRI.replaceRegWith(DestReg, NewCond);
9231 MI.getOperand(SCCIdx).setReg(NewCond);
9237 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9240 for (
auto &Copy : CopyToDelete)
9241 Copy->eraseFromParent();
9249void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9255 for (MachineInstr &
MI :
9258 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9260 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9269 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9277 case AMDGPU::REG_SEQUENCE:
9278 case AMDGPU::INSERT_SUBREG:
9280 case AMDGPU::SOFT_WQM:
9281 case AMDGPU::STRICT_WWM:
9282 case AMDGPU::STRICT_WQM: {
9284 if (RI.isAGPRClass(SrcRC)) {
9285 if (RI.isAGPRClass(NewDstRC))
9290 case AMDGPU::REG_SEQUENCE:
9291 case AMDGPU::INSERT_SUBREG:
9292 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9295 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9301 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9304 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9318 int OpIndices[3])
const {
9319 const MCInstrDesc &
Desc =
MI.getDesc();
9335 const MachineRegisterInfo &
MRI =
MI.getParent()->getParent()->getRegInfo();
9337 for (
unsigned i = 0; i < 3; ++i) {
9338 int Idx = OpIndices[i];
9342 const MachineOperand &MO =
MI.getOperand(Idx);
9348 const TargetRegisterClass *OpRC =
9349 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9350 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9356 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9357 if (RI.isSGPRClass(RegRC))
9375 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9376 SGPRReg = UsedSGPRs[0];
9379 if (!SGPRReg && UsedSGPRs[1]) {
9380 if (UsedSGPRs[1] == UsedSGPRs[2])
9381 SGPRReg = UsedSGPRs[1];
9388 AMDGPU::OpName OperandName)
const {
9389 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9392 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9396 return &
MI.getOperand(Idx);
9410 if (ST.isAmdHsaOS()) {
9413 RsrcDataFormat |= (1ULL << 56);
9418 RsrcDataFormat |= (2ULL << 59);
9421 return RsrcDataFormat;
9431 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9436 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9443 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9449 unsigned Opc =
MI.getOpcode();
9455 return get(
Opc).mayLoad() &&
9460 int &FrameIndex)
const {
9462 if (!Addr || !Addr->
isFI())
9473 int &FrameIndex)
const {
9481 int &FrameIndex)
const {
9495 int &FrameIndex)
const {
9512 while (++
I != E &&
I->isInsideBundle()) {
9513 assert(!
I->isBundle() &&
"No nested bundle!");
9521 unsigned Opc =
MI.getOpcode();
9523 unsigned DescSize =
Desc.getSize();
9528 unsigned Size = DescSize;
9532 if (
MI.isBranch() && ST.hasOffset3fBug())
9543 bool HasLiteral =
false;
9544 unsigned LiteralSize = 4;
9545 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9550 if (ST.has64BitLiterals()) {
9551 switch (OpInfo.OperandType) {
9567 return HasLiteral ? DescSize + LiteralSize : DescSize;
9572 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9576 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9577 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9581 case TargetOpcode::BUNDLE:
9583 case TargetOpcode::INLINEASM:
9584 case TargetOpcode::INLINEASM_BR: {
9586 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9590 if (
MI.isMetaInstruction())
9594 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9597 unsigned LoInstOpcode = D16Info->LoOp;
9599 DescSize =
Desc.getSize();
9603 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9606 DescSize =
Desc.getSize();
9617 if (
MI.memoperands_empty())
9629 static const std::pair<int, const char *> TargetIndices[] = {
9667std::pair<unsigned, unsigned>
9674 static const std::pair<unsigned, const char *> TargetFlags[] = {
9692 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9707 return AMDGPU::WWM_COPY;
9709 return AMDGPU::COPY;
9721 bool IsNullOrVectorRegister =
true;
9724 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9729 return IsNullOrVectorRegister &&
9731 (Opcode == AMDGPU::IMPLICIT_DEF &&
9733 (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
9734 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9742 if (ST.hasAddNoCarry())
9746 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9747 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9758 if (ST.hasAddNoCarry())
9762 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9764 : RS.scavengeRegisterBackwards(
9765 *RI.getBoolRC(),
I,
false,
9778 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9779 case AMDGPU::SI_KILL_I1_TERMINATOR:
9788 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
9789 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
9790 case AMDGPU::SI_KILL_I1_PSEUDO:
9791 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
9803 const unsigned OffsetBits =
9805 return (1 << OffsetBits) - 1;
9812 if (
MI.isInlineAsm())
9815 for (
auto &
Op :
MI.implicit_operands()) {
9816 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
9817 Op.setReg(AMDGPU::VCC_LO);
9826 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
9830 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
9831 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
9848 if (Imm <= MaxImm + 64) {
9850 Overflow = Imm - MaxImm;
9877 if (ST.hasRestrictedSOffset())
9920 if (!ST.hasFlatInstOffsets())
9928 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9940std::pair<int64_t, int64_t>
9943 int64_t RemainderOffset = COffsetVal;
9944 int64_t ImmField = 0;
9949 if (AllowNegative) {
9951 int64_t
D = 1LL << NumBits;
9952 RemainderOffset = (COffsetVal /
D) *
D;
9953 ImmField = COffsetVal - RemainderOffset;
9955 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
9957 (ImmField % 4) != 0) {
9959 RemainderOffset += ImmField % 4;
9960 ImmField -= ImmField % 4;
9962 }
else if (COffsetVal >= 0) {
9964 RemainderOffset = COffsetVal - ImmField;
9968 assert(RemainderOffset + ImmField == COffsetVal);
9969 return {ImmField, RemainderOffset};
9973 if (ST.hasNegativeScratchOffsetBug() &&
9981 switch (ST.getGeneration()) {
10007 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10008 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10009 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10010 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10011 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10012 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10013 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10014 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10021#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10022 case OPCODE##_dpp: \
10023 case OPCODE##_e32: \
10024 case OPCODE##_e64: \
10025 case OPCODE##_e64_dpp: \
10026 case OPCODE##_sdwa:
10040 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10041 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10042 case AMDGPU::V_FMA_F16_gfx9_e64:
10043 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10044 case AMDGPU::V_INTERP_P2_F16:
10045 case AMDGPU::V_MAD_F16_e64:
10046 case AMDGPU::V_MAD_U16_e64:
10047 case AMDGPU::V_MAD_I16_e64:
10069 switch (ST.getGeneration()) {
10082 if (
isMAI(Opcode)) {
10090 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10097 if (ST.hasGFX90AInsts()) {
10099 if (ST.hasGFX940Insts())
10130 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10131 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10132 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10144 switch (
MI.getOpcode()) {
10146 case AMDGPU::REG_SEQUENCE:
10150 case AMDGPU::INSERT_SUBREG:
10151 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10168 if (!
P.Reg.isVirtual())
10172 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10173 while (
auto *
MI = DefInst) {
10175 switch (
MI->getOpcode()) {
10177 case AMDGPU::V_MOV_B32_e32: {
10178 auto &Op1 =
MI->getOperand(1);
10183 DefInst =
MRI.getVRegDef(RSR.Reg);
10191 DefInst =
MRI.getVRegDef(RSR.Reg);
10204 assert(
MRI.isSSA() &&
"Must be run on SSA");
10206 auto *
TRI =
MRI.getTargetRegisterInfo();
10207 auto *DefBB =
DefMI.getParent();
10211 if (
UseMI.getParent() != DefBB)
10214 const int MaxInstScan = 20;
10218 auto E =
UseMI.getIterator();
10219 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10220 if (
I->isDebugInstr())
10223 if (++NumInst > MaxInstScan)
10226 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10236 assert(
MRI.isSSA() &&
"Must be run on SSA");
10238 auto *
TRI =
MRI.getTargetRegisterInfo();
10239 auto *DefBB =
DefMI.getParent();
10241 const int MaxUseScan = 10;
10244 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10245 auto &UseInst = *
Use.getParent();
10248 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10251 if (++NumUse > MaxUseScan)
10258 const int MaxInstScan = 20;
10262 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10265 if (
I->isDebugInstr())
10268 if (++NumInst > MaxInstScan)
10281 if (Reg == VReg && --NumUse == 0)
10283 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10292 auto Cur =
MBB.begin();
10293 if (Cur !=
MBB.end())
10295 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10298 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10307 if (InsPt !=
MBB.end() &&
10308 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10309 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10310 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10311 InsPt->definesRegister(Src,
nullptr)) {
10315 .
addReg(Src, 0, SrcSubReg)
10340 if (isFullCopyInstr(
MI)) {
10341 Register DstReg =
MI.getOperand(0).getReg();
10342 Register SrcReg =
MI.getOperand(1).getReg();
10349 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10353 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10364 unsigned *PredCost)
const {
10365 if (
MI.isBundle()) {
10368 unsigned Lat = 0,
Count = 0;
10369 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10371 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10373 return Lat +
Count - 1;
10376 return SchedModel.computeInstrLatency(&
MI);
10382 unsigned Opcode =
MI.getOpcode();
10387 :
MI.getOperand(1).getReg();
10388 LLT DstTy =
MRI.getType(Dst);
10389 LLT SrcTy =
MRI.getType(Src);
10391 unsigned SrcAS = SrcTy.getAddressSpace();
10394 ST.hasGloballyAddressableScratch()
10402 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10403 return HandleAddrSpaceCast(
MI);
10406 auto IID = GI->getIntrinsicID();
10413 case Intrinsic::amdgcn_addrspacecast_nonnull:
10414 return HandleAddrSpaceCast(
MI);
10415 case Intrinsic::amdgcn_if:
10416 case Intrinsic::amdgcn_else:
10430 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10431 Opcode == AMDGPU::G_SEXTLOAD) {
10432 if (
MI.memoperands_empty())
10436 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10437 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10445 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10446 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10447 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10460 unsigned opcode =
MI.getOpcode();
10461 if (opcode == AMDGPU::V_READLANE_B32 ||
10462 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10463 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10466 if (isCopyInstr(
MI)) {
10470 RI.getPhysRegBaseClass(srcOp.
getReg());
10478 if (
MI.isPreISelOpcode())
10493 if (
MI.memoperands_empty())
10497 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10498 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10513 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10515 if (!
SrcOp.isReg())
10519 if (!Reg || !
SrcOp.readsReg())
10525 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10552 F,
"ds_ordered_count unsupported for this calling conv"));
10566 Register &SrcReg2, int64_t &CmpMask,
10567 int64_t &CmpValue)
const {
10568 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10571 switch (
MI.getOpcode()) {
10574 case AMDGPU::S_CMP_EQ_U32:
10575 case AMDGPU::S_CMP_EQ_I32:
10576 case AMDGPU::S_CMP_LG_U32:
10577 case AMDGPU::S_CMP_LG_I32:
10578 case AMDGPU::S_CMP_LT_U32:
10579 case AMDGPU::S_CMP_LT_I32:
10580 case AMDGPU::S_CMP_GT_U32:
10581 case AMDGPU::S_CMP_GT_I32:
10582 case AMDGPU::S_CMP_LE_U32:
10583 case AMDGPU::S_CMP_LE_I32:
10584 case AMDGPU::S_CMP_GE_U32:
10585 case AMDGPU::S_CMP_GE_I32:
10586 case AMDGPU::S_CMP_EQ_U64:
10587 case AMDGPU::S_CMP_LG_U64:
10588 SrcReg =
MI.getOperand(0).getReg();
10589 if (
MI.getOperand(1).isReg()) {
10590 if (
MI.getOperand(1).getSubReg())
10592 SrcReg2 =
MI.getOperand(1).getReg();
10594 }
else if (
MI.getOperand(1).isImm()) {
10596 CmpValue =
MI.getOperand(1).getImm();
10602 case AMDGPU::S_CMPK_EQ_U32:
10603 case AMDGPU::S_CMPK_EQ_I32:
10604 case AMDGPU::S_CMPK_LG_U32:
10605 case AMDGPU::S_CMPK_LG_I32:
10606 case AMDGPU::S_CMPK_LT_U32:
10607 case AMDGPU::S_CMPK_LT_I32:
10608 case AMDGPU::S_CMPK_GT_U32:
10609 case AMDGPU::S_CMPK_GT_I32:
10610 case AMDGPU::S_CMPK_LE_U32:
10611 case AMDGPU::S_CMPK_LE_I32:
10612 case AMDGPU::S_CMPK_GE_U32:
10613 case AMDGPU::S_CMPK_GE_I32:
10614 SrcReg =
MI.getOperand(0).getReg();
10616 CmpValue =
MI.getOperand(1).getImm();
10635 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10637 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10642 SccDef->setIsDead(
false);
10650 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10651 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10653 bool Op1IsNonZeroImm =
10654 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10655 bool Op2IsZeroImm =
10656 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10657 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10663 Register SrcReg2, int64_t CmpMask,
10672 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10704 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10705 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10711 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
10726 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
10727 this](int64_t ExpectedValue,
unsigned SrcSize,
10728 bool IsReversible,
bool IsSigned) ->
bool {
10756 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
10757 Def->getOpcode() != AMDGPU::S_AND_B64)
10761 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
10772 SrcOp = &Def->getOperand(2);
10773 else if (isMask(&Def->getOperand(2)))
10774 SrcOp = &Def->getOperand(1);
10782 if (IsSigned && BitNo == SrcSize - 1)
10785 ExpectedValue <<= BitNo;
10787 bool IsReversedCC =
false;
10788 if (CmpValue != ExpectedValue) {
10791 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
10796 Register DefReg = Def->getOperand(0).getReg();
10797 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
10803 if (!
MRI->use_nodbg_empty(DefReg)) {
10811 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
10812 : AMDGPU::S_BITCMP1_B32
10813 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
10814 : AMDGPU::S_BITCMP1_B64;
10819 Def->eraseFromParent();
10827 case AMDGPU::S_CMP_EQ_U32:
10828 case AMDGPU::S_CMP_EQ_I32:
10829 case AMDGPU::S_CMPK_EQ_U32:
10830 case AMDGPU::S_CMPK_EQ_I32:
10831 return optimizeCmpAnd(1, 32,
true,
false);
10832 case AMDGPU::S_CMP_GE_U32:
10833 case AMDGPU::S_CMPK_GE_U32:
10834 return optimizeCmpAnd(1, 32,
false,
false);
10835 case AMDGPU::S_CMP_GE_I32:
10836 case AMDGPU::S_CMPK_GE_I32:
10837 return optimizeCmpAnd(1, 32,
false,
true);
10838 case AMDGPU::S_CMP_EQ_U64:
10839 return optimizeCmpAnd(1, 64,
true,
false);
10840 case AMDGPU::S_CMP_LG_U32:
10841 case AMDGPU::S_CMP_LG_I32:
10842 case AMDGPU::S_CMPK_LG_U32:
10843 case AMDGPU::S_CMPK_LG_I32:
10844 return optimizeCmpAnd(0, 32,
true,
false) || optimizeCmpSelect();
10845 case AMDGPU::S_CMP_GT_U32:
10846 case AMDGPU::S_CMPK_GT_U32:
10847 return optimizeCmpAnd(0, 32,
false,
false);
10848 case AMDGPU::S_CMP_GT_I32:
10849 case AMDGPU::S_CMPK_GT_I32:
10850 return optimizeCmpAnd(0, 32,
false,
true);
10851 case AMDGPU::S_CMP_LG_U64:
10852 return optimizeCmpAnd(0, 64,
true,
false) || optimizeCmpSelect();
10859 AMDGPU::OpName
OpName)
const {
10860 if (!ST.needsAlignedVGPRs())
10863 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
10875 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
10877 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
10880 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
10881 : &AMDGPU::VReg_64_Align2RegClass);
10883 .
addReg(DataReg, 0,
Op.getSubReg())
10888 Op.setSubReg(AMDGPU::sub0);
10910 unsigned Opcode =
MI.getOpcode();
10916 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
10917 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
10920 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasAddNoCarry() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.