29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
247 Register DstReg =
I.getOperand(0).getReg();
251 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
254bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
258 Register DstReg =
I.getOperand(0).getReg();
259 Register SrcReg =
I.getOperand(1).getReg();
260 std::optional<ValueAndVReg> Arg =
264 const int64_t
Value = Arg->Value.getZExtValue();
266 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
273 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
279 unsigned SelectOpcode =
280 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
305bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
306 const Register DefReg =
I.getOperand(0).getReg();
307 const LLT DefTy = MRI->getType(DefReg);
319 MRI->getRegClassOrRegBank(DefReg);
321 const TargetRegisterClass *DefRC =
330 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
339 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
340 const Register SrcReg =
I.getOperand(i).getReg();
342 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
344 const LLT SrcTy = MRI->getType(SrcReg);
345 const TargetRegisterClass *SrcRC =
346 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
347 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
352 I.setDesc(TII.get(TargetOpcode::PHI));
353 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
359 unsigned SubIdx)
const {
363 Register DstReg = MRI->createVirtualRegister(&SubRC);
366 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
368 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
394 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
396 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
398 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
404bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
405 Register DstReg =
I.getOperand(0).getReg();
406 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
408 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
409 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
410 DstRB->
getID() != AMDGPU::VCCRegBankID)
413 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
426bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
429 Register DstReg =
I.getOperand(0).getReg();
431 LLT Ty = MRI->getType(DstReg);
436 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
437 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
438 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
442 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
445 .
add(
I.getOperand(1))
446 .
add(
I.getOperand(2))
453 if (STI.hasAddNoCarryInsts()) {
454 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
455 I.setDesc(TII.get(
Opc));
462 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
464 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
468 .
add(
I.getOperand(1))
469 .
add(
I.getOperand(2))
476 assert(!
Sub &&
"illegal sub should not reach here");
478 const TargetRegisterClass &RC
479 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
480 const TargetRegisterClass &HalfRC
481 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
483 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
484 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
485 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
486 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
488 Register DstLo = MRI->createVirtualRegister(&HalfRC);
489 Register DstHi = MRI->createVirtualRegister(&HalfRC);
492 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
495 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
500 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
501 Register CarryReg = MRI->createVirtualRegister(CarryRC);
502 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
507 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
517 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
524 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
531bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
536 Register Dst0Reg =
I.getOperand(0).getReg();
537 Register Dst1Reg =
I.getOperand(1).getReg();
538 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
539 I.getOpcode() == AMDGPU::G_UADDE;
540 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
541 I.getOpcode() == AMDGPU::G_USUBE;
543 if (isVCC(Dst1Reg, *MRI)) {
544 unsigned NoCarryOpc =
545 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
546 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
547 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
554 Register Src0Reg =
I.getOperand(2).getReg();
555 Register Src1Reg =
I.getOperand(3).getReg();
558 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
559 .
addReg(
I.getOperand(4).getReg());
562 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
563 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
565 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
566 .
add(
I.getOperand(2))
567 .
add(
I.getOperand(3));
569 if (MRI->use_nodbg_empty(Dst1Reg)) {
570 CarryInst.setOperandDead(3);
572 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
574 if (!MRI->getRegClassOrNull(Dst1Reg))
575 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
578 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
579 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
580 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
584 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
585 AMDGPU::SReg_32RegClass, *MRI))
592bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
596 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
597 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() &&
598 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
601 if (Subtarget->hasMADIntraFwdBug())
602 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
603 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
605 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
606 : AMDGPU::V_MAD_NC_I64_I32_e64;
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
613 I.setDesc(TII.get(
Opc));
615 I.addImplicitDefUseOperands(*
MF);
616 I.getOperand(0).setIsEarlyClobber(
true);
622bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
624 Register DstReg =
I.getOperand(0).getReg();
625 Register SrcReg =
I.getOperand(1).getReg();
626 LLT DstTy = MRI->getType(DstReg);
627 LLT SrcTy = MRI->getType(SrcReg);
632 unsigned Offset =
I.getOperand(2).getImm();
633 if (
Offset % 32 != 0 || DstSize > 128)
641 const TargetRegisterClass *DstRC =
642 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
643 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
647 const TargetRegisterClass *SrcRC =
648 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
653 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
658 *SrcRC,
I.getOperand(1));
660 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661 .
addReg(SrcReg, {}, SubReg);
667bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
668 MachineBasicBlock *BB =
MI.getParent();
670 LLT DstTy = MRI->getType(DstReg);
671 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
678 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
680 const TargetRegisterClass *DstRC =
681 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
685 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
686 MachineInstrBuilder MIB =
687 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
688 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
689 MachineOperand &Src =
MI.getOperand(
I + 1);
693 const TargetRegisterClass *SrcRC
694 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
695 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
699 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
702 MI.eraseFromParent();
706bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
707 MachineBasicBlock *BB =
MI.getParent();
708 const int NumDst =
MI.getNumOperands() - 1;
710 MachineOperand &Src =
MI.getOperand(NumDst);
714 LLT DstTy = MRI->getType(DstReg0);
715 LLT SrcTy = MRI->getType(SrcReg);
720 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
722 const TargetRegisterClass *SrcRC =
723 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
724 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
730 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
731 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
732 MachineOperand &Dst =
MI.getOperand(
I);
734 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID &&
735 SubRegs[
I] == AMDGPU::hi16) {
736 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst.getReg())
740 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
741 .
addReg(SrcReg, {}, SubRegs[
I]);
745 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
746 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
749 const TargetRegisterClass *DstRC =
750 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
751 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
755 MI.eraseFromParent();
759bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
760 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
761 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
765 LLT SrcTy = MRI->getType(Src0);
769 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
770 return selectG_MERGE_VALUES(
MI);
777 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
781 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
782 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
785 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
786 DstBank->
getID() == AMDGPU::VGPRRegBankID);
787 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
790 MachineBasicBlock *BB =
MI.getParent();
800 const int64_t K0 = ConstSrc0->Value.getSExtValue();
801 const int64_t K1 = ConstSrc1->Value.getSExtValue();
802 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
803 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
804 uint32_t
Imm = Lo16 | (Hi16 << 16);
809 MI.eraseFromParent();
810 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
815 MI.eraseFromParent();
816 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
827 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
828 MI.setDesc(TII.get(AMDGPU::COPY));
831 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
832 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
833 RBI.constrainGenericRegister(Src0, RC, *MRI);
838 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
839 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
844 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
850 MI.eraseFromParent();
875 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
876 if (Shift0 && Shift1) {
877 Opc = AMDGPU::S_PACK_HH_B32_B16;
878 MI.getOperand(1).setReg(ShiftSrc0);
879 MI.getOperand(2).setReg(ShiftSrc1);
881 Opc = AMDGPU::S_PACK_LH_B32_B16;
882 MI.getOperand(2).setReg(ShiftSrc1);
886 if (ConstSrc1 && ConstSrc1->Value == 0) {
888 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
893 MI.eraseFromParent();
897 if (STI.hasSPackHL()) {
898 Opc = AMDGPU::S_PACK_HL_B32_B16;
899 MI.getOperand(1).setReg(ShiftSrc0);
903 MI.setDesc(TII.get(
Opc));
908bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
909 const MachineOperand &MO =
I.getOperand(0);
913 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
914 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
915 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
916 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
923bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
926 Register DstReg =
I.getOperand(0).getReg();
927 Register Src0Reg =
I.getOperand(1).getReg();
928 Register Src1Reg =
I.getOperand(2).getReg();
929 LLT Src1Ty = MRI->getType(Src1Reg);
931 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
934 int64_t
Offset =
I.getOperand(3).getImm();
937 if (
Offset % 32 != 0 || InsSize % 32 != 0)
944 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
945 if (SubReg == AMDGPU::NoSubRegister)
948 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
949 const TargetRegisterClass *DstRC =
950 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
954 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
955 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
956 const TargetRegisterClass *Src0RC =
957 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
958 const TargetRegisterClass *Src1RC =
959 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
963 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
964 if (!Src0RC || !Src1RC)
967 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
968 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
969 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
973 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
982bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
985 Register OffsetReg =
MI.getOperand(2).getReg();
986 Register WidthReg =
MI.getOperand(3).getReg();
988 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
989 "scalar BFX instructions are expanded in regbankselect");
990 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
991 "64-bit vector BFX instructions are expanded in regbankselect");
994 MachineBasicBlock *
MBB =
MI.getParent();
996 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
997 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1002 MI.eraseFromParent();
1007bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1008 if (STI.getLDSBankCount() != 16)
1014 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1015 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1016 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1026 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1028 MachineBasicBlock *
MBB =
MI.getParent();
1032 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1035 .
addImm(
MI.getOperand(3).getImm());
1048 MI.eraseFromParent();
1057bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1059 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1062 MachineBasicBlock *
MBB =
MI.getParent();
1066 Register LaneSelect =
MI.getOperand(3).getReg();
1069 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1071 std::optional<ValueAndVReg> ConstSelect =
1077 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1080 std::optional<ValueAndVReg> ConstVal =
1086 STI.hasInv2PiInlineImm())) {
1087 MIB.
addImm(ConstVal->Value.getSExtValue());
1095 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1097 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1105 MI.eraseFromParent();
1112bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1116 LLT Ty = MRI->getType(Dst0);
1119 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1121 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1128 MachineBasicBlock *
MBB =
MI.getParent();
1132 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1134 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1147 MI.eraseFromParent();
1152bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1154 switch (IntrinsicID) {
1155 case Intrinsic::amdgcn_if_break: {
1160 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1161 .
add(
I.getOperand(0))
1162 .
add(
I.getOperand(2))
1163 .
add(
I.getOperand(3));
1165 Register DstReg =
I.getOperand(0).getReg();
1166 Register Src0Reg =
I.getOperand(2).getReg();
1167 Register Src1Reg =
I.getOperand(3).getReg();
1169 I.eraseFromParent();
1172 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1176 case Intrinsic::amdgcn_interp_p1_f16:
1177 return selectInterpP1F16(
I);
1178 case Intrinsic::amdgcn_wqm:
1179 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1180 case Intrinsic::amdgcn_softwqm:
1181 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1182 case Intrinsic::amdgcn_strict_wwm:
1183 case Intrinsic::amdgcn_wwm:
1184 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1185 case Intrinsic::amdgcn_strict_wqm:
1186 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1187 case Intrinsic::amdgcn_writelane:
1188 return selectWritelane(
I);
1189 case Intrinsic::amdgcn_div_scale:
1190 return selectDivScale(
I);
1191 case Intrinsic::amdgcn_icmp:
1192 case Intrinsic::amdgcn_fcmp:
1195 return selectIntrinsicCmp(
I);
1196 case Intrinsic::amdgcn_ballot:
1197 return selectBallot(
I);
1198 case Intrinsic::amdgcn_reloc_constant:
1199 return selectRelocConstant(
I);
1200 case Intrinsic::amdgcn_groupstaticsize:
1201 return selectGroupStaticSize(
I);
1202 case Intrinsic::returnaddress:
1203 return selectReturnAddress(
I);
1204 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1205 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1206 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1207 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1208 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1209 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1211 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1212 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1213 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1215 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1216 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1217 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1218 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1219 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1220 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1221 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1222 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1223 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1224 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1225 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1226 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1227 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1228 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1229 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1230 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1231 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1232 return selectSMFMACIntrin(
I);
1233 case Intrinsic::amdgcn_permlane16_swap:
1234 case Intrinsic::amdgcn_permlane32_swap:
1235 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1236 case Intrinsic::amdgcn_wave_shuffle:
1237 return selectWaveShuffleIntrin(
I);
1248 if (
Size == 16 && !ST.has16BitInsts())
1251 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1252 unsigned FakeS16Opc,
unsigned S32Opc,
1255 return ST.hasTrue16BitInsts()
1256 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1267 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1268 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1269 AMDGPU::V_CMP_NE_U64_e64);
1271 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1272 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1273 AMDGPU::V_CMP_EQ_U64_e64);
1275 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1276 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1277 AMDGPU::V_CMP_GT_I64_e64);
1279 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1280 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1281 AMDGPU::V_CMP_GE_I64_e64);
1283 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1284 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1285 AMDGPU::V_CMP_LT_I64_e64);
1287 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1288 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1289 AMDGPU::V_CMP_LE_I64_e64);
1291 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1292 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1293 AMDGPU::V_CMP_GT_U64_e64);
1295 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1296 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1297 AMDGPU::V_CMP_GE_U64_e64);
1299 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1300 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1301 AMDGPU::V_CMP_LT_U64_e64);
1303 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1304 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1305 AMDGPU::V_CMP_LE_U64_e64);
1308 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1309 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1310 AMDGPU::V_CMP_EQ_F64_e64);
1312 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1313 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1314 AMDGPU::V_CMP_GT_F64_e64);
1316 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1317 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1318 AMDGPU::V_CMP_GE_F64_e64);
1320 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1321 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1322 AMDGPU::V_CMP_LT_F64_e64);
1324 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1325 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1326 AMDGPU::V_CMP_LE_F64_e64);
1328 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1329 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1330 AMDGPU::V_CMP_NEQ_F64_e64);
1332 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1333 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1334 AMDGPU::V_CMP_O_F64_e64);
1336 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1337 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1338 AMDGPU::V_CMP_U_F64_e64);
1340 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1341 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1342 AMDGPU::V_CMP_NLG_F64_e64);
1344 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1345 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1346 AMDGPU::V_CMP_NLE_F64_e64);
1348 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1349 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1350 AMDGPU::V_CMP_NLT_F64_e64);
1352 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1353 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1354 AMDGPU::V_CMP_NGE_F64_e64);
1356 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1357 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1358 AMDGPU::V_CMP_NGT_F64_e64);
1360 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1361 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1362 AMDGPU::V_CMP_NEQ_F64_e64);
1364 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1365 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1366 AMDGPU::V_CMP_TRU_F64_e64);
1368 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1369 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1370 AMDGPU::V_CMP_F_F64_e64);
1375 unsigned Size)
const {
1377 if (!STI.hasScalarCompareEq64())
1382 return AMDGPU::S_CMP_LG_U64;
1384 return AMDGPU::S_CMP_EQ_U64;
1393 return AMDGPU::S_CMP_LG_U32;
1395 return AMDGPU::S_CMP_EQ_U32;
1397 return AMDGPU::S_CMP_GT_I32;
1399 return AMDGPU::S_CMP_GE_I32;
1401 return AMDGPU::S_CMP_LT_I32;
1403 return AMDGPU::S_CMP_LE_I32;
1405 return AMDGPU::S_CMP_GT_U32;
1407 return AMDGPU::S_CMP_GE_U32;
1409 return AMDGPU::S_CMP_LT_U32;
1411 return AMDGPU::S_CMP_LE_U32;
1413 return AMDGPU::S_CMP_EQ_F32;
1415 return AMDGPU::S_CMP_GT_F32;
1417 return AMDGPU::S_CMP_GE_F32;
1419 return AMDGPU::S_CMP_LT_F32;
1421 return AMDGPU::S_CMP_LE_F32;
1423 return AMDGPU::S_CMP_LG_F32;
1425 return AMDGPU::S_CMP_O_F32;
1427 return AMDGPU::S_CMP_U_F32;
1429 return AMDGPU::S_CMP_NLG_F32;
1431 return AMDGPU::S_CMP_NLE_F32;
1433 return AMDGPU::S_CMP_NLT_F32;
1435 return AMDGPU::S_CMP_NGE_F32;
1437 return AMDGPU::S_CMP_NGT_F32;
1439 return AMDGPU::S_CMP_NEQ_F32;
1446 if (!STI.hasSALUFloatInsts())
1451 return AMDGPU::S_CMP_EQ_F16;
1453 return AMDGPU::S_CMP_GT_F16;
1455 return AMDGPU::S_CMP_GE_F16;
1457 return AMDGPU::S_CMP_LT_F16;
1459 return AMDGPU::S_CMP_LE_F16;
1461 return AMDGPU::S_CMP_LG_F16;
1463 return AMDGPU::S_CMP_O_F16;
1465 return AMDGPU::S_CMP_U_F16;
1467 return AMDGPU::S_CMP_NLG_F16;
1469 return AMDGPU::S_CMP_NLE_F16;
1471 return AMDGPU::S_CMP_NLT_F16;
1473 return AMDGPU::S_CMP_NGE_F16;
1475 return AMDGPU::S_CMP_NGT_F16;
1477 return AMDGPU::S_CMP_NEQ_F16;
1486bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1491 Register SrcReg =
I.getOperand(2).getReg();
1492 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1496 Register CCReg =
I.getOperand(0).getReg();
1497 if (!isVCC(CCReg, *MRI)) {
1498 int Opcode = getS_CMPOpcode(Pred,
Size);
1501 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1502 .
add(
I.getOperand(2))
1503 .
add(
I.getOperand(3));
1504 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1508 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1509 I.eraseFromParent();
1513 if (
I.getOpcode() == AMDGPU::G_FCMP)
1520 MachineInstrBuilder ICmp;
1523 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1525 .
add(
I.getOperand(2))
1527 .
add(
I.getOperand(3))
1530 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1531 .
add(
I.getOperand(2))
1532 .
add(
I.getOperand(3));
1536 *TRI.getBoolRC(), *MRI);
1538 I.eraseFromParent();
1542bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1543 Register Dst =
I.getOperand(0).getReg();
1544 if (isVCC(Dst, *MRI))
1547 LLT DstTy = MRI->getType(Dst);
1553 Register SrcReg =
I.getOperand(2).getReg();
1554 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1562 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1563 I.eraseFromParent();
1564 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1571 MachineInstrBuilder SelectedMI;
1572 MachineOperand &
LHS =
I.getOperand(2);
1573 MachineOperand &
RHS =
I.getOperand(3);
1574 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1575 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1577 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1579 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1580 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1582 SelectedMI.
addImm(Src0Mods);
1583 SelectedMI.
addReg(Src0Reg);
1585 SelectedMI.
addImm(Src1Mods);
1586 SelectedMI.
addReg(Src1Reg);
1592 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1595 I.eraseFromParent();
1606 if (
MI->getParent() !=
MBB)
1610 if (
MI->getOpcode() == AMDGPU::COPY) {
1613 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1614 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1631bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1634 Register DstReg =
I.getOperand(0).getReg();
1635 Register SrcReg =
I.getOperand(2).getReg();
1636 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1637 const unsigned WaveSize = STI.getWavefrontSize();
1641 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1644 std::optional<ValueAndVReg> Arg =
1649 if (BallotSize != WaveSize) {
1650 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1654 const int64_t
Value = Arg->Value.getZExtValue();
1657 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1664 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1670 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1674 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1684 if (BallotSize != WaveSize) {
1685 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1687 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1694 I.eraseFromParent();
1698bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1699 Register DstReg =
I.getOperand(0).getReg();
1700 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1701 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1702 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1705 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1707 Module *
M =
MF->getFunction().getParent();
1708 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1715 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1718 I.eraseFromParent();
1722bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1725 Register DstReg =
I.getOperand(0).getReg();
1726 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1727 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1728 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1736 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1739 Module *
M =
MF->getFunction().getParent();
1740 const GlobalValue *GV =
1745 I.eraseFromParent();
1750bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1755 MachineOperand &Dst =
I.getOperand(0);
1757 unsigned Depth =
I.getOperand(2).getImm();
1759 const TargetRegisterClass *RC
1760 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1762 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1767 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1770 I.eraseFromParent();
1774 MachineFrameInfo &MFI =
MF.getFrameInfo();
1779 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1781 AMDGPU::SReg_64RegClass,
DL);
1784 I.eraseFromParent();
1788bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1791 MachineBasicBlock *BB =
MI.getParent();
1792 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1793 .
add(
MI.getOperand(1));
1796 MI.eraseFromParent();
1798 if (!MRI->getRegClassOrNull(
Reg))
1799 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1803bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1805 MachineBasicBlock *
MBB =
MI.getParent();
1809 unsigned IndexOperand =
MI.getOperand(7).getImm();
1810 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1811 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1813 if (WaveDone && !WaveRelease) {
1817 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1820 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1821 IndexOperand &= ~0x3f;
1822 unsigned CountDw = 0;
1825 CountDw = (IndexOperand >> 24) & 0xf;
1826 IndexOperand &= ~(0xf << 24);
1828 if (CountDw < 1 || CountDw > 4) {
1831 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1839 Fn,
"ds_ordered_count: bad index operand",
DL));
1842 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1845 unsigned Offset0 = OrderedCountIndex << 2;
1846 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1849 Offset1 |= (CountDw - 1) << 6;
1852 Offset1 |= ShaderType << 2;
1854 unsigned Offset = Offset0 | (Offset1 << 8);
1862 MachineInstrBuilder
DS =
1863 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1868 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1872 MI.eraseFromParent();
1878 case Intrinsic::amdgcn_ds_gws_init:
1879 return AMDGPU::DS_GWS_INIT;
1880 case Intrinsic::amdgcn_ds_gws_barrier:
1881 return AMDGPU::DS_GWS_BARRIER;
1882 case Intrinsic::amdgcn_ds_gws_sema_v:
1883 return AMDGPU::DS_GWS_SEMA_V;
1884 case Intrinsic::amdgcn_ds_gws_sema_br:
1885 return AMDGPU::DS_GWS_SEMA_BR;
1886 case Intrinsic::amdgcn_ds_gws_sema_p:
1887 return AMDGPU::DS_GWS_SEMA_P;
1888 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1889 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1895bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1897 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1898 !STI.hasGWSSemaReleaseAll()))
1902 const bool HasVSrc =
MI.getNumOperands() == 3;
1903 assert(HasVSrc ||
MI.getNumOperands() == 2);
1905 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1906 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1907 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1913 MachineBasicBlock *
MBB =
MI.getParent();
1916 MachineInstr *Readfirstlane =
nullptr;
1921 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1922 Readfirstlane = OffsetDef;
1927 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1937 std::tie(BaseOffset, ImmOffset) =
1940 if (Readfirstlane) {
1943 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1949 if (!RBI.constrainGenericRegister(BaseOffset,
1950 AMDGPU::SReg_32RegClass, *MRI))
1954 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1969 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1974 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1975 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1976 const TargetRegisterClass *SubRC =
1977 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1981 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1991 Register DataReg = MRI->createVirtualRegister(DataRC);
1992 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1995 Register UndefReg = MRI->createVirtualRegister(SubRC);
2014 MI.eraseFromParent();
2018bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2019 bool IsAppend)
const {
2020 Register PtrBase =
MI.getOperand(2).getReg();
2021 LLT PtrTy = MRI->getType(PtrBase);
2025 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2028 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2029 PtrBase =
MI.getOperand(2).getReg();
2033 MachineBasicBlock *
MBB =
MI.getParent();
2035 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2039 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2046 MI.eraseFromParent();
2051bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2052 MachineFunction *
MF =
MI.getMF();
2053 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2064 TFE = TexFailCtrl & 0x1;
2066 LWE = TexFailCtrl & 0x2;
2069 return TexFailCtrl == 0;
2072bool AMDGPUInstructionSelector::selectImageIntrinsic(
2074 MachineBasicBlock *
MBB =
MI.getParent();
2080 Register ResultDef =
MI.getOperand(0).getReg();
2081 if (MRI->use_nodbg_empty(ResultDef))
2085 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2094 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2096 Register VDataIn = AMDGPU::NoRegister;
2097 Register VDataOut = AMDGPU::NoRegister;
2099 int NumVDataDwords = -1;
2100 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2101 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2107 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2111 bool IsTexFail =
false;
2113 TFE, LWE, IsTexFail))
2116 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2117 const bool IsA16 = (
Flags & 1) != 0;
2118 const bool IsG16 = (
Flags & 2) != 0;
2121 if (IsA16 && !STI.hasG16() && !IsG16)
2125 unsigned DMaskLanes = 0;
2127 if (BaseOpcode->
Atomic) {
2129 VDataOut =
MI.getOperand(0).getReg();
2130 VDataIn =
MI.getOperand(2).getReg();
2131 LLT Ty = MRI->getType(VDataIn);
2134 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2139 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2141 DMask = Is64Bit ? 0xf : 0x3;
2142 NumVDataDwords = Is64Bit ? 4 : 2;
2144 DMask = Is64Bit ? 0x3 : 0x1;
2145 NumVDataDwords = Is64Bit ? 2 : 1;
2148 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2151 if (BaseOpcode->
Store) {
2152 VDataIn =
MI.getOperand(1).getReg();
2153 VDataTy = MRI->getType(VDataIn);
2158 VDataOut =
MI.getOperand(0).getReg();
2159 VDataTy = MRI->getType(VDataOut);
2160 NumVDataDwords = DMaskLanes;
2162 if (IsD16 && !STI.hasUnpackedD16VMem())
2163 NumVDataDwords = (DMaskLanes + 1) / 2;
2168 if (Subtarget->hasG16() && IsG16) {
2169 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2172 IntrOpcode = G16MappingInfo->
G16;
2176 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2186 int NumVAddrRegs = 0;
2187 int NumVAddrDwords = 0;
2190 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2191 if (!AddrOp.
isReg())
2199 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2206 NumVAddrRegs != 1 &&
2207 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2208 : NumVAddrDwords == NumVAddrRegs);
2209 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2220 NumVDataDwords, NumVAddrDwords);
2221 }
else if (IsGFX12Plus) {
2223 NumVDataDwords, NumVAddrDwords);
2224 }
else if (IsGFX11Plus) {
2226 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2227 : AMDGPU::MIMGEncGfx11Default,
2228 NumVDataDwords, NumVAddrDwords);
2229 }
else if (IsGFX10Plus) {
2231 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2232 : AMDGPU::MIMGEncGfx10Default,
2233 NumVDataDwords, NumVAddrDwords);
2235 if (Subtarget->hasGFX90AInsts()) {
2237 NumVDataDwords, NumVAddrDwords);
2241 <<
"requested image instruction is not supported on this GPU\n");
2248 NumVDataDwords, NumVAddrDwords);
2251 NumVDataDwords, NumVAddrDwords);
2261 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2263 Register TmpReg = MRI->createVirtualRegister(
2264 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2265 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2268 if (!MRI->use_empty(VDataOut)) {
2281 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2282 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2283 if (SrcOp.
isReg()) {
2302 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2304 MIB.
addImm(IsA16 ? -1 : 0);
2306 if (!Subtarget->hasGFX90AInsts()) {
2318 MIB.
addImm(IsD16 ? -1 : 0);
2320 MI.eraseFromParent();
2322 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2328bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2334 MachineBasicBlock *
MBB =
MI.getParent();
2339 unsigned Offset =
MI.getOperand(6).getImm();
2343 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2344 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2345 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2347 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2348 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2350 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2351 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2363 MI.eraseFromParent();
2368bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2371 switch (IntrinsicID) {
2372 case Intrinsic::amdgcn_end_cf:
2373 return selectEndCfIntrinsic(
I);
2374 case Intrinsic::amdgcn_ds_ordered_add:
2375 case Intrinsic::amdgcn_ds_ordered_swap:
2376 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2377 case Intrinsic::amdgcn_ds_gws_init:
2378 case Intrinsic::amdgcn_ds_gws_barrier:
2379 case Intrinsic::amdgcn_ds_gws_sema_v:
2380 case Intrinsic::amdgcn_ds_gws_sema_br:
2381 case Intrinsic::amdgcn_ds_gws_sema_p:
2382 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2383 return selectDSGWSIntrinsic(
I, IntrinsicID);
2384 case Intrinsic::amdgcn_ds_append:
2385 return selectDSAppendConsume(
I,
true);
2386 case Intrinsic::amdgcn_ds_consume:
2387 return selectDSAppendConsume(
I,
false);
2388 case Intrinsic::amdgcn_init_whole_wave:
2389 return selectInitWholeWave(
I);
2390 case Intrinsic::amdgcn_raw_buffer_load_lds:
2391 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2392 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2393 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2394 case Intrinsic::amdgcn_struct_buffer_load_lds:
2395 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2396 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2397 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2398 return selectBufferLoadLds(
I);
2403 case Intrinsic::amdgcn_load_to_lds:
2404 case Intrinsic::amdgcn_load_async_to_lds:
2405 case Intrinsic::amdgcn_global_load_lds:
2406 case Intrinsic::amdgcn_global_load_async_lds:
2407 return selectGlobalLoadLds(
I);
2408 case Intrinsic::amdgcn_tensor_load_to_lds:
2409 case Intrinsic::amdgcn_tensor_store_from_lds:
2410 return selectTensorLoadStore(
I, IntrinsicID);
2411 case Intrinsic::amdgcn_asyncmark:
2412 case Intrinsic::amdgcn_wait_asyncmark:
2413 if (!Subtarget->hasAsyncMark())
2416 case Intrinsic::amdgcn_exp_compr:
2417 if (!STI.hasCompressedExport()) {
2419 F.getContext().diagnose(
2420 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2425 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2426 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2427 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2428 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2429 return selectDSBvhStackIntrinsic(
I);
2430 case Intrinsic::amdgcn_s_alloc_vgpr: {
2436 Register ResReg =
I.getOperand(0).getReg();
2438 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2439 .
add(
I.getOperand(2));
2442 I.eraseFromParent();
2444 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2446 case Intrinsic::amdgcn_s_barrier_init:
2447 case Intrinsic::amdgcn_s_barrier_signal_var:
2448 return selectNamedBarrierInit(
I, IntrinsicID);
2449 case Intrinsic::amdgcn_s_wakeup_barrier: {
2450 if (!STI.hasSWakeupBarrier()) {
2452 F.getContext().diagnose(
2453 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2457 return selectNamedBarrierInst(
I, IntrinsicID);
2459 case Intrinsic::amdgcn_s_barrier_join:
2460 case Intrinsic::amdgcn_s_get_named_barrier_state:
2461 return selectNamedBarrierInst(
I, IntrinsicID);
2462 case Intrinsic::amdgcn_s_get_barrier_state:
2463 return selectSGetBarrierState(
I, IntrinsicID);
2464 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2465 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2470bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2477 Register DstReg =
I.getOperand(0).getReg();
2478 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2480 const MachineOperand &CCOp =
I.getOperand(1);
2482 if (!isVCC(CCReg, *MRI)) {
2483 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2484 AMDGPU::S_CSELECT_B32;
2485 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2491 if (!MRI->getRegClassOrNull(CCReg))
2492 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2494 .
add(
I.getOperand(2))
2495 .
add(
I.getOperand(3));
2499 I.eraseFromParent();
2508 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2510 .
add(
I.getOperand(3))
2512 .
add(
I.getOperand(2))
2513 .
add(
I.getOperand(1));
2516 I.eraseFromParent();
2520bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2521 Register DstReg =
I.getOperand(0).getReg();
2522 Register SrcReg =
I.getOperand(1).getReg();
2523 const LLT DstTy = MRI->getType(DstReg);
2524 const LLT SrcTy = MRI->getType(SrcReg);
2527 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2528 const RegisterBank *DstRB;
2534 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2539 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2544 const TargetRegisterClass *SrcRC =
2545 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2546 const TargetRegisterClass *DstRC =
2547 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2548 if (!SrcRC || !DstRC)
2551 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2552 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2557 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2558 assert(STI.useRealTrue16Insts());
2562 .
addReg(SrcReg, {}, AMDGPU::lo16);
2563 I.eraseFromParent();
2571 Register LoReg = MRI->createVirtualRegister(DstRC);
2572 Register HiReg = MRI->createVirtualRegister(DstRC);
2574 .
addReg(SrcReg, {}, AMDGPU::sub0);
2576 .
addReg(SrcReg, {}, AMDGPU::sub1);
2578 if (IsVALU && STI.hasSDWA()) {
2581 MachineInstr *MovSDWA =
2582 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2592 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2593 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2594 Register ImmReg = MRI->createVirtualRegister(DstRC);
2596 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2606 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2607 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2608 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2620 And.setOperandDead(3);
2621 Or.setOperandDead(3);
2625 I.eraseFromParent();
2633 unsigned SubRegIdx = DstSize < 32
2634 ?
static_cast<unsigned>(AMDGPU::sub0)
2635 : TRI.getSubRegFromChannel(0, DstSize / 32);
2636 if (SubRegIdx == AMDGPU::NoSubRegister)
2641 const TargetRegisterClass *SrcWithSubRC
2642 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2646 if (SrcWithSubRC != SrcRC) {
2647 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2651 I.getOperand(1).setSubReg(SubRegIdx);
2654 I.setDesc(TII.get(TargetOpcode::COPY));
2661 int SignedMask =
static_cast<int>(Mask);
2662 return SignedMask >= -16 && SignedMask <= 64;
2666const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2675 return &RBI.getRegBankFromRegClass(*RC, LLT());
2679bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2680 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2681 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2684 const Register DstReg =
I.getOperand(0).getReg();
2685 const Register SrcReg =
I.getOperand(1).getReg();
2687 const LLT DstTy = MRI->getType(DstReg);
2688 const LLT SrcTy = MRI->getType(SrcReg);
2689 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2696 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2699 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2701 return selectCOPY(
I);
2703 const TargetRegisterClass *SrcRC =
2704 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2705 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2706 const TargetRegisterClass *DstRC =
2707 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2709 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2710 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2716 I.eraseFromParent();
2718 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2719 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2722 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2728 MachineInstr *ExtI =
2732 I.eraseFromParent();
2737 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2738 MachineInstr *ExtI =
2743 I.eraseFromParent();
2748 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2749 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2750 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2751 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2754 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2755 const unsigned SextOpc = SrcSize == 8 ?
2756 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2759 I.eraseFromParent();
2760 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2765 if (DstSize > 32 && SrcSize == 32) {
2766 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2767 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2770 .
addReg(SrcReg, {}, SubReg)
2778 .
addReg(SrcReg, {}, SubReg)
2779 .addImm(AMDGPU::sub0)
2782 I.eraseFromParent();
2783 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2787 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2788 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2791 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2793 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2794 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2795 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2797 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2799 .
addReg(SrcReg, {}, SubReg)
2800 .addImm(AMDGPU::sub0)
2808 I.eraseFromParent();
2809 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2824 I.eraseFromParent();
2825 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2849 if (Unmerge->getNumDefs() == 2 && Unmerge->getOperand(1).getReg() == In &&
2851 Out = Unmerge->getSourceReg();
2871 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2878 assert(Mask.size() == 2);
2880 if (Mask[0] == 1 && Mask[1] <= 1) {
2888bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2889 if (!Subtarget->hasSALUFloatInsts())
2892 Register Dst =
I.getOperand(0).getReg();
2893 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2894 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2897 Register Src =
I.getOperand(1).getReg();
2903 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2905 I.eraseFromParent();
2906 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2913bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2926 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2927 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2932 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2936 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2937 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2940 MachineBasicBlock *BB =
MI.getParent();
2942 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2943 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2944 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2945 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2947 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2948 .
addReg(Src, {}, AMDGPU::sub0);
2949 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2950 .
addReg(Src, {}, AMDGPU::sub1);
2951 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2955 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2960 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2965 MI.eraseFromParent();
2970bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2972 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2973 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2978 MachineBasicBlock *BB =
MI.getParent();
2980 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2981 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2982 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2983 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2985 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2986 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2989 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2990 .
addReg(Src, {}, AMDGPU::sub0);
2991 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2992 .
addReg(Src, {}, AMDGPU::sub1);
2993 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2998 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
3002 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3008 MI.eraseFromParent();
3013 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3016void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
3019 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
3020 const MachineInstr *PtrMI =
3021 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3025 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3030 for (
unsigned i = 1; i != 3; ++i) {
3031 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3032 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3037 assert(GEPInfo.Imm == 0);
3041 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3042 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3043 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3045 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3049 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3052bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3053 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3056bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3057 if (!
MI.hasOneMemOperand())
3060 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3073 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3074 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3075 AMDGPU::SGPRRegBankID;
3078 return I &&
I->getMetadata(
"amdgpu.uniform");
3082 for (
const GEPInfo &GEPInfo : AddrInfo) {
3083 if (!GEPInfo.VgprParts.empty())
3089void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3090 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3093 STI.ldsRequiresM0Init()) {
3097 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3102bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3109 if (
Reg.isPhysical())
3113 const unsigned Opcode =
MI.getOpcode();
3115 if (Opcode == AMDGPU::COPY)
3118 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3119 Opcode == AMDGPU::G_XOR)
3124 return GI->is(Intrinsic::amdgcn_class);
3126 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3129bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3131 MachineOperand &CondOp =
I.getOperand(0);
3137 const TargetRegisterClass *ConstrainRC;
3144 if (!isVCC(CondReg, *MRI)) {
3148 CondPhysReg = AMDGPU::SCC;
3149 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3150 ConstrainRC = &AMDGPU::SReg_32RegClass;
3157 const bool Is64 = STI.isWave64();
3158 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3159 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3161 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3162 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3169 CondPhysReg = TRI.getVCC();
3170 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3171 ConstrainRC = TRI.getBoolRC();
3174 if (!MRI->getRegClassOrNull(CondReg))
3175 MRI->setRegClass(CondReg, ConstrainRC);
3177 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3180 .
addMBB(
I.getOperand(1).getMBB());
3182 I.eraseFromParent();
3186bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3188 Register DstReg =
I.getOperand(0).getReg();
3189 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3190 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3191 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3195 return RBI.constrainGenericRegister(
3196 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3199bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3200 Register DstReg =
I.getOperand(0).getReg();
3201 Register SrcReg =
I.getOperand(1).getReg();
3202 Register MaskReg =
I.getOperand(2).getReg();
3203 LLT Ty = MRI->getType(DstReg);
3204 LLT MaskTy = MRI->getType(MaskReg);
3208 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3209 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3210 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3211 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3217 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3221 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3222 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3225 !CanCopyLow32 && !CanCopyHi32) {
3226 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3230 I.eraseFromParent();
3235 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3236 const TargetRegisterClass &RegRC
3237 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3239 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3240 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3241 const TargetRegisterClass *MaskRC =
3242 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3244 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3245 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3246 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3251 "ptrmask should have been narrowed during legalize");
3253 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3259 I.eraseFromParent();
3263 Register HiReg = MRI->createVirtualRegister(&RegRC);
3264 Register LoReg = MRI->createVirtualRegister(&RegRC);
3267 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3268 .
addReg(SrcReg, {}, AMDGPU::sub0);
3269 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3270 .
addReg(SrcReg, {}, AMDGPU::sub1);
3279 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3280 MaskedLo = MRI->createVirtualRegister(&RegRC);
3282 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3283 .
addReg(MaskReg, {}, AMDGPU::sub0);
3284 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3293 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3294 MaskedHi = MRI->createVirtualRegister(&RegRC);
3296 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3297 .
addReg(MaskReg, {}, AMDGPU::sub1);
3298 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3303 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3308 I.eraseFromParent();
3314static std::pair<Register, unsigned>
3321 std::tie(IdxBaseReg,
Offset) =
3323 if (IdxBaseReg == AMDGPU::NoRegister) {
3327 IdxBaseReg = IdxReg;
3334 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3335 return std::pair(IdxReg, SubRegs[0]);
3336 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3339bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3345 LLT DstTy = MRI->getType(DstReg);
3346 LLT SrcTy = MRI->getType(SrcReg);
3348 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3349 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3350 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3354 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3357 const TargetRegisterClass *SrcRC =
3358 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3359 const TargetRegisterClass *DstRC =
3360 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3361 if (!SrcRC || !DstRC)
3363 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3364 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3365 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3368 MachineBasicBlock *BB =
MI.getParent();
3376 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3380 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3383 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3385 .
addReg(SrcReg, {}, SubReg)
3387 MI.eraseFromParent();
3394 if (!STI.useVGPRIndexMode()) {
3395 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3397 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3398 .
addReg(SrcReg, {}, SubReg)
3400 MI.eraseFromParent();
3404 const MCInstrDesc &GPRIDXDesc =
3405 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3411 MI.eraseFromParent();
3416bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3423 LLT VecTy = MRI->getType(DstReg);
3424 LLT ValTy = MRI->getType(ValReg);
3428 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3429 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3430 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3436 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3439 const TargetRegisterClass *VecRC =
3440 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3441 const TargetRegisterClass *ValRC =
3442 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3444 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3445 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3446 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3447 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3450 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3454 std::tie(IdxReg, SubReg) =
3457 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3458 STI.useVGPRIndexMode();
3460 MachineBasicBlock *BB =
MI.getParent();
3464 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3467 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3468 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3473 MI.eraseFromParent();
3477 const MCInstrDesc &GPRIDXDesc =
3478 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3485 MI.eraseFromParent();
3491 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3492 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3493 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3494 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3495 case Intrinsic::amdgcn_load_async_to_lds:
3496 case Intrinsic::amdgcn_global_load_async_lds:
3502bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3503 if (!Subtarget->hasVMemToLDSLoad())
3506 unsigned Size =
MI.getOperand(3).getImm();
3510 const bool HasVIndex =
MI.getNumOperands() == 9;
3514 VIndex =
MI.getOperand(4).getReg();
3518 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3519 std::optional<ValueAndVReg> MaybeVOffset =
3521 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3527 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3528 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3529 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3530 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3533 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3534 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3535 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3536 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3539 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3540 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3541 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3542 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3545 if (!Subtarget->hasLDSLoadB96_B128())
3548 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3549 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3550 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3551 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3554 if (!Subtarget->hasLDSLoadB96_B128())
3557 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3558 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3559 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3560 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3564 MachineBasicBlock *
MBB =
MI.getParent();
3567 .
add(
MI.getOperand(2));
3571 if (HasVIndex && HasVOffset) {
3572 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3573 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3580 }
else if (HasVIndex) {
3582 }
else if (HasVOffset) {
3586 MIB.
add(
MI.getOperand(1));
3587 MIB.
add(
MI.getOperand(5 + OpOffset));
3588 MIB.
add(
MI.getOperand(6 + OpOffset));
3590 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3599 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3604 MachinePointerInfo StorePtrI = LoadPtrI;
3615 MachineMemOperand *StoreMMO =
3621 MI.eraseFromParent();
3634 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3640 return Def->getOperand(1).getReg();
3654 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3662 return Def->getOperand(1).getReg();
3664 if (
VT->signBitIsZero(
Reg))
3665 return matchZeroExtendFromS32(
Reg);
3673AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3675 : matchZeroExtendFromS32(
Reg);
3681AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3683 : matchSignExtendFromS32(
Reg);
3687AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3688 bool IsSigned)
const {
3690 return matchSignExtendFromS32OrS32(
Reg);
3692 return matchZeroExtendFromS32OrS32(
Reg);
3702 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3709 return Def->getOperand(1).getReg();
3714bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3715 if (!Subtarget->hasVMemToLDSLoad())
3719 unsigned Size =
MI.getOperand(3).getImm();
3726 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3729 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3732 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3735 if (!Subtarget->hasLDSLoadB96_B128())
3737 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3740 if (!Subtarget->hasLDSLoadB96_B128())
3742 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3746 MachineBasicBlock *
MBB =
MI.getParent();
3749 .
add(
MI.getOperand(2));
3755 if (!isSGPR(Addr)) {
3757 if (isSGPR(AddrDef->Reg)) {
3758 Addr = AddrDef->Reg;
3759 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3762 if (isSGPR(SAddr)) {
3763 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3764 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3775 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3787 MIB.
add(
MI.getOperand(4));
3789 unsigned Aux =
MI.getOperand(5).getImm();
3793 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3795 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3796 MachinePointerInfo StorePtrI = LoadPtrI;
3805 MachineMemOperand *StoreMMO =
3807 sizeof(int32_t),
Align(4));
3811 MI.eraseFromParent();
3816bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3818 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3820 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3824 const auto isAllZeros = [&](MachineOperand &Opnd) {
3825 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3834 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3835 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3840 MachineBasicBlock *
MBB =
MI.getParent();
3842 .
add(
MI.getOperand(1))
3843 .
add(
MI.getOperand(2));
3845 if (NumGroups >= 4) {
3846 MIB.
add(
MI.getOperand(3))
3847 .
add(
MI.getOperand(4));
3851 .
add(
MI.getOperand(6));
3853 MI.eraseFromParent();
3857bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3859 unsigned OpcodeOpIdx =
3860 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3861 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3862 MI.removeOperand(OpcodeOpIdx);
3863 MI.addImplicitDefUseOperands(*
MI.getMF());
3870bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3873 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3874 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3876 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3877 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3879 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3880 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3882 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3883 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3885 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3886 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3888 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3889 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3891 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3892 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3894 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3895 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3897 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3898 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3900 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3901 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3903 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3904 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3906 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3907 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3909 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3910 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3912 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3913 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3915 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3916 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3918 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3919 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3921 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3922 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3924 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3925 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3927 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3928 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3930 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3931 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3933 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3934 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3936 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3937 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3939 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3940 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3942 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3943 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3945 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3946 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3948 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3949 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3951 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3952 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3954 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3955 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3961 auto VDst_In =
MI.getOperand(4);
3963 MI.setDesc(TII.get(
Opc));
3964 MI.removeOperand(4);
3965 MI.removeOperand(1);
3966 MI.addOperand(VDst_In);
3967 MI.addImplicitDefUseOperands(*
MI.getMF());
3968 const MCInstrDesc &MCID =
MI.getDesc();
3970 MI.getOperand(0).setIsEarlyClobber(
true);
3975bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3977 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3978 !Subtarget->hasPermlane16Swap())
3980 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3981 !Subtarget->hasPermlane32Swap())
3984 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3985 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3986 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3988 MI.removeOperand(2);
3989 MI.setDesc(TII.get(Opcode));
3992 MachineOperand &FI =
MI.getOperand(4);
3999bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
4002 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4003 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4004 MachineBasicBlock *
MBB =
MI.getParent();
4008 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
4009 .
addImm(Subtarget->getWavefrontSizeLog2())
4014 .
addImm(Subtarget->getWavefrontSizeLog2())
4018 const TargetRegisterClass &RC =
4019 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
4020 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4023 MI.eraseFromParent();
4027bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4030 MachineBasicBlock *
MBB =
MI.getParent();
4037 const LLT DstTy = MRI->getType(DstReg);
4039 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4040 const TargetRegisterClass *DstRC =
4041 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4046 if (!Subtarget->supportsBPermute())
4050 if (Subtarget->supportsWaveWideBPermute()) {
4051 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4052 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4062 assert(Subtarget->isWave64());
4066 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4067 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4069 Register UndefExecReg = MRI->createVirtualRegister(
4070 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4071 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4073 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4074 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4082 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4083 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4087 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4088 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4096 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4097 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4102 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4103 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4106 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4107 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4112 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4113 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4120 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4121 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4125 Register XORReg = MRI->createVirtualRegister(DstRC);
4130 Register ANDReg = MRI->createVirtualRegister(DstRC);
4135 Register CompareReg = MRI->createVirtualRegister(
4136 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4137 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4142 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4150 MI.eraseFromParent();
4159 unsigned NumOpcodes = 0;
4172 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4183 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4197 if (Src.size() == 3) {
4204 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4205 if (Src[
I] ==
LHS) {
4215 Bits = SrcBits[Src.size()];
4221 switch (
MI->getOpcode()) {
4222 case TargetOpcode::G_AND:
4223 case TargetOpcode::G_OR:
4224 case TargetOpcode::G_XOR: {
4229 if (!getOperandBits(
LHS, LHSBits) ||
4230 !getOperandBits(
RHS, RHSBits)) {
4231 Src = std::move(Backup);
4232 return std::make_pair(0, 0);
4238 NumOpcodes +=
Op.first;
4239 LHSBits =
Op.second;
4244 NumOpcodes +=
Op.first;
4245 RHSBits =
Op.second;
4250 return std::make_pair(0, 0);
4254 switch (
MI->getOpcode()) {
4255 case TargetOpcode::G_AND:
4256 TTbl = LHSBits & RHSBits;
4258 case TargetOpcode::G_OR:
4259 TTbl = LHSBits | RHSBits;
4261 case TargetOpcode::G_XOR:
4262 TTbl = LHSBits ^ RHSBits;
4268 return std::make_pair(NumOpcodes + 1, TTbl);
4271bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4272 if (!Subtarget->hasBitOp3Insts())
4276 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4277 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4283 unsigned NumOpcodes;
4285 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4289 if (NumOpcodes < 2 || Src.empty())
4292 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4293 if (NumOpcodes == 2 && IsB32) {
4301 }
else if (NumOpcodes < 4) {
4308 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4309 if (!IsB32 && STI.hasTrue16BitInsts())
4310 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4311 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4312 unsigned CBL = STI.getConstantBusLimit(
Opc);
4313 MachineBasicBlock *
MBB =
MI.getParent();
4316 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4317 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4318 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4324 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4335 while (Src.size() < 3)
4336 Src.push_back(Src[0]);
4353 MI.eraseFromParent();
4358bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4360 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4363 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4365 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4367 MachineBasicBlock *
MBB =
MI.getParent();
4371 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4374 .
addImm(Subtarget->getWavefrontSizeLog2())
4381 MI.eraseFromParent();
4387 if (!
I.isPreISelOpcode()) {
4389 return selectCOPY(
I);
4393 switch (
I.getOpcode()) {
4394 case TargetOpcode::G_AND:
4395 case TargetOpcode::G_OR:
4396 case TargetOpcode::G_XOR:
4397 if (selectBITOP3(
I))
4401 return selectG_AND_OR_XOR(
I);
4402 case TargetOpcode::G_ADD:
4403 case TargetOpcode::G_SUB:
4404 case TargetOpcode::G_PTR_ADD:
4407 return selectG_ADD_SUB(
I);
4408 case TargetOpcode::G_UADDO:
4409 case TargetOpcode::G_USUBO:
4410 case TargetOpcode::G_UADDE:
4411 case TargetOpcode::G_USUBE:
4412 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4413 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4414 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4415 return selectG_AMDGPU_MAD_64_32(
I);
4416 case TargetOpcode::G_INTTOPTR:
4417 case TargetOpcode::G_BITCAST:
4418 case TargetOpcode::G_PTRTOINT:
4419 case TargetOpcode::G_FREEZE:
4420 return selectCOPY(
I);
4421 case TargetOpcode::G_FNEG:
4424 return selectG_FNEG(
I);
4425 case TargetOpcode::G_FABS:
4428 return selectG_FABS(
I);
4429 case TargetOpcode::G_EXTRACT:
4430 return selectG_EXTRACT(
I);
4431 case TargetOpcode::G_MERGE_VALUES:
4432 case TargetOpcode::G_CONCAT_VECTORS:
4433 return selectG_MERGE_VALUES(
I);
4434 case TargetOpcode::G_UNMERGE_VALUES:
4435 return selectG_UNMERGE_VALUES(
I);
4436 case TargetOpcode::G_BUILD_VECTOR:
4437 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4438 return selectG_BUILD_VECTOR(
I);
4439 case TargetOpcode::G_IMPLICIT_DEF:
4440 return selectG_IMPLICIT_DEF(
I);
4441 case TargetOpcode::G_INSERT:
4442 return selectG_INSERT(
I);
4443 case TargetOpcode::G_INTRINSIC:
4444 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4445 return selectG_INTRINSIC(
I);
4446 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4447 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4448 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4449 case TargetOpcode::G_ICMP:
4450 case TargetOpcode::G_FCMP:
4451 if (selectG_ICMP_or_FCMP(
I))
4454 case TargetOpcode::G_LOAD:
4455 case TargetOpcode::G_ZEXTLOAD:
4456 case TargetOpcode::G_SEXTLOAD:
4457 case TargetOpcode::G_STORE:
4458 case TargetOpcode::G_ATOMIC_CMPXCHG:
4459 case TargetOpcode::G_ATOMICRMW_XCHG:
4460 case TargetOpcode::G_ATOMICRMW_ADD:
4461 case TargetOpcode::G_ATOMICRMW_SUB:
4462 case TargetOpcode::G_ATOMICRMW_AND:
4463 case TargetOpcode::G_ATOMICRMW_OR:
4464 case TargetOpcode::G_ATOMICRMW_XOR:
4465 case TargetOpcode::G_ATOMICRMW_MIN:
4466 case TargetOpcode::G_ATOMICRMW_MAX:
4467 case TargetOpcode::G_ATOMICRMW_UMIN:
4468 case TargetOpcode::G_ATOMICRMW_UMAX:
4469 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4470 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4471 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4472 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4473 case TargetOpcode::G_ATOMICRMW_FADD:
4474 case TargetOpcode::G_ATOMICRMW_FMIN:
4475 case TargetOpcode::G_ATOMICRMW_FMAX:
4476 return selectG_LOAD_STORE_ATOMICRMW(
I);
4477 case TargetOpcode::G_SELECT:
4478 return selectG_SELECT(
I);
4479 case TargetOpcode::G_TRUNC:
4480 return selectG_TRUNC(
I);
4481 case TargetOpcode::G_SEXT:
4482 case TargetOpcode::G_ZEXT:
4483 case TargetOpcode::G_ANYEXT:
4484 case TargetOpcode::G_SEXT_INREG:
4488 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4491 return selectG_SZA_EXT(
I);
4492 case TargetOpcode::G_FPEXT:
4493 if (selectG_FPEXT(
I))
4496 case TargetOpcode::G_BRCOND:
4497 return selectG_BRCOND(
I);
4498 case TargetOpcode::G_GLOBAL_VALUE:
4499 return selectG_GLOBAL_VALUE(
I);
4500 case TargetOpcode::G_PTRMASK:
4501 return selectG_PTRMASK(
I);
4502 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4503 return selectG_EXTRACT_VECTOR_ELT(
I);
4504 case TargetOpcode::G_INSERT_VECTOR_ELT:
4505 return selectG_INSERT_VECTOR_ELT(
I);
4506 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4507 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4508 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4509 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4510 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4513 assert(Intr &&
"not an image intrinsic with image pseudo");
4514 return selectImageIntrinsic(
I, Intr);
4516 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4517 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4518 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4519 return selectBVHIntersectRayIntrinsic(
I);
4520 case AMDGPU::G_SBFX:
4521 case AMDGPU::G_UBFX:
4522 return selectG_SBFX_UBFX(
I);
4523 case AMDGPU::G_SI_CALL:
4524 I.setDesc(TII.get(AMDGPU::SI_CALL));
4526 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4527 return selectWaveAddress(
I);
4528 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4529 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4532 case AMDGPU::G_STACKRESTORE:
4533 return selectStackRestore(
I);
4535 return selectPHI(
I);
4536 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4537 return selectCOPY_SCC_VCC(
I);
4538 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4539 return selectCOPY_VCC_SCC(
I);
4540 case AMDGPU::G_AMDGPU_READANYLANE:
4541 return selectReadAnyLane(
I);
4542 case TargetOpcode::G_CONSTANT:
4543 case TargetOpcode::G_FCONSTANT:
4551AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4558std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4559 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4563 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4564 Src =
MI->getOperand(1).getReg();
4567 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4572 if (
LHS &&
LHS->isZero()) {
4574 Src =
MI->getOperand(2).getReg();
4578 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4579 Src =
MI->getOperand(1).getReg();
4586 return std::pair(Src, Mods);
4589std::pair<Register, unsigned>
4590AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4592 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4594 return std::pair(Src, Mods);
4597Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4599 bool ForceVGPR)
const {
4600 if ((Mods != 0 || ForceVGPR) &&
4601 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4608 TII.
get(AMDGPU::COPY), VGPRSrc)
4620AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4622 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4627AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4630 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4633 [=](MachineInstrBuilder &MIB) {
4634 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4636 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4637 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4638 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4643AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4646 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4651 [=](MachineInstrBuilder &MIB) {
4652 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4654 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4655 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4656 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4661AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4663 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4664 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4665 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4670AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4673 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4676 [=](MachineInstrBuilder &MIB) {
4677 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4679 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4684AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4688 std::tie(Src, Mods) =
4689 selectVOP3ModsImpl(Root.
getReg(),
false);
4692 [=](MachineInstrBuilder &MIB) {
4693 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4695 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4700AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4703 std::tie(Src, Mods) =
4704 selectVOP3ModsImpl(Root.
getReg(),
true,
4708 [=](MachineInstrBuilder &MIB) {
4709 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4711 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4716AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4719 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4722 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4747 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4752 return DstSize * 2 == SrcSize;
4758 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4762 std::optional<ValueAndVReg> ShiftAmt;
4763 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4766 unsigned Shift = ShiftAmt->Value.getZExtValue();
4767 return Shift * 2 == SrcSize;
4775 if (
MI->getOpcode() != AMDGPU::G_SHL)
4779 std::optional<ValueAndVReg> ShiftAmt;
4780 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4783 unsigned Shift = ShiftAmt->Value.getZExtValue();
4784 return Shift * 2 == SrcSize;
4792 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4794 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4795 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4965static std::optional<std::pair<Register, SrcStatus>>
4970 unsigned Opc =
MI->getOpcode();
4974 case AMDGPU::G_BITCAST:
4975 return std::optional<std::pair<Register, SrcStatus>>(
4976 {
MI->getOperand(1).getReg(), Curr.second});
4978 if (
MI->getOperand(1).getReg().isPhysical())
4979 return std::nullopt;
4980 return std::optional<std::pair<Register, SrcStatus>>(
4981 {
MI->getOperand(1).getReg(), Curr.second});
4982 case AMDGPU::G_FNEG: {
4985 return std::nullopt;
4986 return std::optional<std::pair<Register, SrcStatus>>(
4987 {
MI->getOperand(1).getReg(), Stat});
4994 switch (Curr.second) {
4997 return std::optional<std::pair<Register, SrcStatus>>(
5000 if (Curr.first ==
MI->getOperand(0).getReg())
5001 return std::optional<std::pair<Register, SrcStatus>>(
5003 return std::optional<std::pair<Register, SrcStatus>>(
5015 return std::optional<std::pair<Register, SrcStatus>>(
5019 if (Curr.first ==
MI->getOperand(0).getReg())
5020 return std::optional<std::pair<Register, SrcStatus>>(
5022 return std::optional<std::pair<Register, SrcStatus>>(
5028 return std::optional<std::pair<Register, SrcStatus>>(
5033 return std::optional<std::pair<Register, SrcStatus>>(
5038 return std::optional<std::pair<Register, SrcStatus>>(
5043 return std::optional<std::pair<Register, SrcStatus>>(
5049 return std::nullopt;
5059 bool HasNeg =
false;
5061 bool HasOpsel =
true;
5066 unsigned Opc =
MI->getOpcode();
5068 if (
Opc == TargetOpcode::G_INTRINSIC) {
5071 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5098 while (
Depth <= MaxDepth && Curr.has_value()) {
5101 Statlist.push_back(Curr.value());
5108static std::pair<Register, SrcStatus>
5115 while (
Depth <= MaxDepth && Curr.has_value()) {
5121 LastSameOrNeg = Curr.value();
5126 return LastSameOrNeg;
5133 return Width1 == Width2;
5168 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5169 IsHalfState(HiStat);
5172std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5178 return {RootReg, Mods};
5181 SearchOptions SO(RootReg, MRI);
5192 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5194 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5195 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5197 return {Stat.first, Mods};
5203 if (StatlistHi.
empty()) {
5205 return {Stat.first, Mods};
5211 if (StatlistLo.
empty()) {
5213 return {Stat.first, Mods};
5216 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5217 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5218 if (StatlistHi[
I].first == StatlistLo[J].first &&
5220 StatlistHi[
I].first, RootReg, TII, MRI))
5221 return {StatlistHi[
I].first,
5222 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5228 return {Stat.first, Mods};
5238 return RB->
getID() == RBNo;
5255 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5256 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5260 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5269 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5277AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5282 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5286 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5287 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5292AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5294 return selectVOP3PRetHelper(Root);
5298AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5300 return selectVOP3PRetHelper(Root,
true);
5304AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5308 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5312 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5316AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5319 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5322 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5323 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5328AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5331 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5335 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5339AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5342 "expected i1 value");
5348 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5356 switch (Elts.
size()) {
5358 DstRegClass = &AMDGPU::VReg_256RegClass;
5361 DstRegClass = &AMDGPU::VReg_128RegClass;
5364 DstRegClass = &AMDGPU::VReg_64RegClass;
5371 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5373 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5384 if (ModOpcode == TargetOpcode::G_FNEG) {
5388 for (
auto El : Elts) {
5394 if (Elts.size() != NegAbsElts.
size()) {
5403 assert(ModOpcode == TargetOpcode::G_FABS);
5411AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5417 assert(BV->getNumSources() > 0);
5419 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5420 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5423 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5424 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5431 if (BV->getNumSources() == EltsF32.
size()) {
5437 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5438 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5442AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5448 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5456 if (CV->getNumSources() == EltsV2F16.
size()) {
5463 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5464 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5468AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5474 assert(CV->getNumSources() > 0);
5475 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5477 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5481 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5482 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5489 if (CV->getNumSources() == EltsV2F16.
size()) {
5496 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5497 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5501AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5502 std::optional<FPValueAndVReg> FPValReg;
5504 if (TII.isInlineConstant(FPValReg->Value)) {
5505 return {{[=](MachineInstrBuilder &MIB) {
5506 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5516 if (TII.isInlineConstant(ICst)) {
5526AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5532 std::optional<ValueAndVReg> ShiftAmt;
5534 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5535 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5536 Key = ShiftAmt->Value.getZExtValue() / 8;
5541 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5542 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5547AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5554 std::optional<ValueAndVReg> ShiftAmt;
5556 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5557 ShiftAmt->Value.getZExtValue() == 16) {
5563 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5564 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5569AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5576 S32 = matchAnyExtendFromS32(Src);
5580 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5585 Src =
Def->getOperand(2).getReg();
5592 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5593 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5598AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5601 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5605 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5606 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5612AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5615 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5621 [=](MachineInstrBuilder &MIB) {
5623 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5625 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5630AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5633 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5639 [=](MachineInstrBuilder &MIB) {
5641 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5643 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5650bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5652 bool IsSigned)
const {
5653 if (!Subtarget->hasScaleOffset())
5657 MachineMemOperand *MMO = *
MI.memoperands_begin();
5669 OffsetReg =
Def->Reg;
5684 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5688 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5689 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5690 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5691 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5704bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5708 bool *ScaleOffset)
const {
5710 MachineBasicBlock *
MBB =
MI->getParent();
5715 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5717 if (AddrInfo.
empty())
5720 const GEPInfo &GEPI = AddrInfo[0];
5721 std::optional<int64_t> EncodedImm;
5724 *ScaleOffset =
false;
5729 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5730 AddrInfo.
size() > 1) {
5731 const GEPInfo &GEPI2 = AddrInfo[1];
5732 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5733 Register OffsetReg = GEPI2.SgprParts[1];
5736 selectScaleOffset(Root, OffsetReg,
false );
5737 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5739 Base = GEPI2.SgprParts[0];
5740 *SOffset = OffsetReg;
5749 auto SKnown =
VT->getKnownBits(*SOffset);
5750 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5762 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5763 Base = GEPI.SgprParts[0];
5769 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5775 Base = GEPI.SgprParts[0];
5776 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5777 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5782 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5783 Register OffsetReg = GEPI.SgprParts[1];
5785 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5786 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5788 Base = GEPI.SgprParts[0];
5789 *SOffset = OffsetReg;
5798AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5801 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5803 return std::nullopt;
5805 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5806 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5810AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5812 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5814 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5815 return std::nullopt;
5817 const GEPInfo &GEPInfo = AddrInfo[0];
5818 Register PtrReg = GEPInfo.SgprParts[0];
5819 std::optional<int64_t> EncodedImm =
5822 return std::nullopt;
5825 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5826 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5831AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5834 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5836 return std::nullopt;
5839 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5840 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5841 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5845AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5849 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5850 return std::nullopt;
5853 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5854 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5856 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5859std::pair<Register, int>
5860AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5861 uint64_t FlatVariant)
const {
5866 if (!STI.hasFlatInstOffsets())
5870 int64_t ConstOffset;
5872 std::tie(PtrBase, ConstOffset, IsInBounds) =
5873 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5879 if (ConstOffset == 0 ||
5881 !isFlatScratchBaseLegal(Root.
getReg())) ||
5885 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5886 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5889 return std::pair(PtrBase, ConstOffset);
5893AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5897 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5898 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5903AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5907 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5908 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5913AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5917 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5918 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5924AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5926 bool NeedIOffset)
const {
5929 int64_t ConstOffset;
5930 int64_t ImmOffset = 0;
5934 std::tie(PtrBase, ConstOffset, std::ignore) =
5935 getPtrBaseWithConstantOffset(Addr, *MRI);
5937 if (ConstOffset != 0) {
5942 ImmOffset = ConstOffset;
5945 if (isSGPR(PtrBaseDef->Reg)) {
5946 if (ConstOffset > 0) {
5952 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5954 std::tie(SplitImmOffset, RemainderOffset) =
5959 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5962 MachineBasicBlock *
MBB =
MI->getParent();
5964 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5966 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5968 .
addImm(RemainderOffset);
5972 [=](MachineInstrBuilder &MIB) {
5975 [=](MachineInstrBuilder &MIB) {
5978 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5979 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5982 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5983 [=](MachineInstrBuilder &MIB) {
5986 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5996 unsigned NumLiterals =
5997 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5998 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5999 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
6000 return std::nullopt;
6007 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6012 if (isSGPR(SAddr)) {
6013 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
6017 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
6018 Subtarget->hasSignedGVSOffset());
6019 if (
Register VOffset = matchExtendFromS32OrS32(
6020 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
6022 return {{[=](MachineInstrBuilder &MIB) {
6025 [=](MachineInstrBuilder &MIB) {
6028 [=](MachineInstrBuilder &MIB) {
6031 [=](MachineInstrBuilder &MIB) {
6035 return {{[=](MachineInstrBuilder &MIB) {
6038 [=](MachineInstrBuilder &MIB) {
6041 [=](MachineInstrBuilder &MIB) {
6051 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6052 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6053 return std::nullopt;
6058 MachineBasicBlock *
MBB =
MI->getParent();
6059 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6061 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6066 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6067 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6068 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6069 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6072 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6073 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6074 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6079AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6080 return selectGlobalSAddr(Root, 0);
6084AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6090 return selectGlobalSAddr(Root, PassedCPol);
6094AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6100 return selectGlobalSAddr(Root, PassedCPol);
6104AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6109AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6116 return selectGlobalSAddr(Root, PassedCPol,
false);
6120AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6127 return selectGlobalSAddr(Root, PassedCPol,
false);
6131AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6134 int64_t ConstOffset;
6135 int64_t ImmOffset = 0;
6139 std::tie(PtrBase, ConstOffset, std::ignore) =
6140 getPtrBaseWithConstantOffset(Addr, *MRI);
6142 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6146 ImmOffset = ConstOffset;
6150 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6151 int FI = AddrDef->MI->getOperand(1).
getIndex();
6154 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6160 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6161 Register LHS = AddrDef->MI->getOperand(1).getReg();
6162 Register RHS = AddrDef->MI->getOperand(2).getReg();
6166 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6167 isSGPR(RHSDef->Reg)) {
6168 int FI = LHSDef->MI->getOperand(1).getIndex();
6172 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6174 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6182 return std::nullopt;
6185 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6186 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6191bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6193 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6199 auto VKnown =
VT->getKnownBits(VAddr);
6202 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6203 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6204 return (VMax & 3) + (
SMax & 3) >= 4;
6208AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6211 int64_t ConstOffset;
6212 int64_t ImmOffset = 0;
6216 std::tie(PtrBase, ConstOffset, std::ignore) =
6217 getPtrBaseWithConstantOffset(Addr, *MRI);
6220 if (ConstOffset != 0 &&
6224 ImmOffset = ConstOffset;
6228 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6229 return std::nullopt;
6231 Register RHS = AddrDef->MI->getOperand(2).getReg();
6232 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6233 return std::nullopt;
6235 Register LHS = AddrDef->MI->getOperand(1).getReg();
6238 if (OrigAddr != Addr) {
6239 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6240 return std::nullopt;
6242 if (!isFlatScratchBaseLegalSV(OrigAddr))
6243 return std::nullopt;
6246 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6247 return std::nullopt;
6249 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6253 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6254 int FI = LHSDef->MI->getOperand(1).getIndex();
6256 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6258 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6259 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6268 return std::nullopt;
6271 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6272 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6273 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6274 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6279AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6281 MachineBasicBlock *
MBB =
MI->getParent();
6283 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6288 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6293 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6297 return {{[=](MachineInstrBuilder &MIB) {
6300 [=](MachineInstrBuilder &MIB) {
6303 [=](MachineInstrBuilder &MIB) {
6308 [=](MachineInstrBuilder &MIB) {
6317 std::optional<int> FI;
6320 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6322 int64_t ConstOffset;
6323 std::tie(PtrBase, ConstOffset, std::ignore) =
6324 getPtrBaseWithConstantOffset(VAddr, *MRI);
6325 if (ConstOffset != 0) {
6326 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6327 (!STI.privateMemoryResourceIsRangeChecked() ||
6328 VT->signBitIsZero(PtrBase))) {
6329 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6330 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6336 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6340 return {{[=](MachineInstrBuilder &MIB) {
6343 [=](MachineInstrBuilder &MIB) {
6349 [=](MachineInstrBuilder &MIB) {
6354 [=](MachineInstrBuilder &MIB) {
6359bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6364 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6369 return VT->signBitIsZero(
Base);
6372bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6374 unsigned Size)
const {
6375 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6380 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6385 return VT->signBitIsZero(
Base);
6390 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6391 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6398bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6406 if (STI.hasSignedScratchOffsets())
6412 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6413 std::optional<ValueAndVReg> RhsValReg =
6419 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6420 RhsValReg->Value.getSExtValue() > -0x40000000)
6424 return VT->signBitIsZero(
LHS);
6429bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6437 if (STI.hasSignedScratchOffsets())
6442 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6447bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6451 if (STI.hasSignedScratchOffsets())
6456 std::optional<DefinitionAndSourceRegister> BaseDef =
6458 std::optional<ValueAndVReg> RHSOffset =
6468 (RHSOffset->Value.getSExtValue() < 0 &&
6469 RHSOffset->Value.getSExtValue() > -0x40000000)))
6472 Register LHS = BaseDef->MI->getOperand(1).getReg();
6473 Register RHS = BaseDef->MI->getOperand(2).getReg();
6474 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6477bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6478 unsigned ShAmtBits)
const {
6479 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6481 std::optional<APInt>
RHS =
6486 if (
RHS->countr_one() >= ShAmtBits)
6489 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6490 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6494AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6497 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6499 std::optional<DefinitionAndSourceRegister>
Def =
6501 assert(Def &&
"this shouldn't be an optional result");
6506 [=](MachineInstrBuilder &MIB) {
6509 [=](MachineInstrBuilder &MIB) {
6512 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6523 if (!TII.isLegalMUBUFImmOffset(
Offset))
6531 [=](MachineInstrBuilder &MIB) {
6534 [=](MachineInstrBuilder &MIB) {
6542 !TII.isLegalMUBUFImmOffset(
Offset))
6546 [=](MachineInstrBuilder &MIB) {
6549 [=](MachineInstrBuilder &MIB) {
6556std::pair<Register, unsigned>
6557AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6558 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6559 int64_t ConstAddr = 0;
6563 std::tie(PtrBase,
Offset, std::ignore) =
6564 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6567 if (isDSOffsetLegal(PtrBase,
Offset)) {
6569 return std::pair(PtrBase,
Offset);
6571 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6580 return std::pair(Root.
getReg(), 0);
6584AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6587 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6589 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6595AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6596 return selectDSReadWrite2(Root, 4);
6600AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6601 return selectDSReadWrite2(Root, 8);
6605AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6606 unsigned Size)
const {
6611 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6613 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6617std::pair<Register, unsigned>
6618AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6619 unsigned Size)
const {
6620 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6621 int64_t ConstAddr = 0;
6625 std::tie(PtrBase,
Offset, std::ignore) =
6626 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6629 int64_t OffsetValue0 =
Offset;
6631 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6633 return std::pair(PtrBase, OffsetValue0 /
Size);
6635 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6643 return std::pair(Root.
getReg(), 0);
6651std::tuple<Register, int64_t, bool>
6652AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6655 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6656 return {Root, 0,
false};
6659 std::optional<ValueAndVReg> MaybeOffset =
6662 return {Root, 0,
false};
6682 B.buildInstr(AMDGPU::S_MOV_B32)
6685 B.buildInstr(AMDGPU::S_MOV_B32)
6692 B.buildInstr(AMDGPU::REG_SEQUENCE)
6695 .addImm(AMDGPU::sub0)
6697 .addImm(AMDGPU::sub1);
6702 B.buildInstr(AMDGPU::S_MOV_B64)
6707 B.buildInstr(AMDGPU::REG_SEQUENCE)
6710 .addImm(AMDGPU::sub0_sub1)
6712 .addImm(AMDGPU::sub2_sub3);
6719 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6728 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6735AMDGPUInstructionSelector::MUBUFAddressData
6736AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6737 MUBUFAddressData
Data;
6743 std::tie(PtrBase,
Offset, std::ignore) =
6744 getPtrBaseWithConstantOffset(Src, *MRI);
6750 if (MachineInstr *InputAdd
6752 Data.N2 = InputAdd->getOperand(1).getReg();
6753 Data.N3 = InputAdd->getOperand(2).getReg();
6768bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6774 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6775 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6781void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6783 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6787 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6788 B.buildInstr(AMDGPU::S_MOV_B32)
6794bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6799 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6802 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6803 if (!shouldUseAddr64(AddrData))
6809 Offset = AddrData.Offset;
6815 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6817 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6830 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6841 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6845bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6850 if (STI.useFlatForGlobal())
6853 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6854 if (shouldUseAddr64(AddrData))
6860 Offset = AddrData.Offset;
6866 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6871AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6877 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6883 [=](MachineInstrBuilder &MIB) {
6886 [=](MachineInstrBuilder &MIB) {
6889 [=](MachineInstrBuilder &MIB) {
6892 else if (STI.hasRestrictedSOffset())
6893 MIB.
addReg(AMDGPU::SGPR_NULL);
6897 [=](MachineInstrBuilder &MIB) {
6907AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6912 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6916 [=](MachineInstrBuilder &MIB) {
6919 [=](MachineInstrBuilder &MIB) {
6922 else if (STI.hasRestrictedSOffset())
6923 MIB.
addReg(AMDGPU::SGPR_NULL);
6935AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6940 SOffset = AMDGPU::SGPR_NULL;
6942 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6946static std::optional<uint64_t>
6950 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6951 return std::nullopt;
6952 return Lo_32(*OffsetVal);
6956AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6957 std::optional<uint64_t> OffsetVal =
6962 std::optional<int64_t> EncodedImm =
6967 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6971AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6978 std::optional<int64_t> EncodedImm =
6983 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6987AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6995 return std::nullopt;
6997 std::optional<int64_t> EncodedOffset =
7000 return std::nullopt;
7003 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
7004 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
7007std::pair<Register, unsigned>
7008AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
7009 bool &Matched)
const {
7014 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7024 const auto CheckAbsNeg = [&]() {
7029 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7060AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7065 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7070 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7071 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7076AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7080 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7083 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7084 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7088bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7092 Register CCReg =
I.getOperand(0).getReg();
7097 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7098 .
addImm(
I.getOperand(2).getImm());
7102 I.eraseFromParent();
7103 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7107bool AMDGPUInstructionSelector::selectSGetBarrierState(
7111 const MachineOperand &BarOp =
I.getOperand(2);
7112 std::optional<int64_t> BarValImm =
7116 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7120 MachineInstrBuilder MIB;
7121 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7122 : AMDGPU::S_GET_BARRIER_STATE_M0;
7125 auto DstReg =
I.getOperand(0).getReg();
7126 const TargetRegisterClass *DstRC =
7127 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7128 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7134 I.eraseFromParent();
7139 if (HasInlineConst) {
7143 case Intrinsic::amdgcn_s_barrier_join:
7144 return AMDGPU::S_BARRIER_JOIN_IMM;
7145 case Intrinsic::amdgcn_s_wakeup_barrier:
7146 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7147 case Intrinsic::amdgcn_s_get_named_barrier_state:
7148 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7154 case Intrinsic::amdgcn_s_barrier_join:
7155 return AMDGPU::S_BARRIER_JOIN_M0;
7156 case Intrinsic::amdgcn_s_wakeup_barrier:
7157 return AMDGPU::S_WAKEUP_BARRIER_M0;
7158 case Intrinsic::amdgcn_s_get_named_barrier_state:
7159 return AMDGPU::S_GET_BARRIER_STATE_M0;
7164bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7168 const MachineOperand &BarOp =
I.getOperand(1);
7169 const MachineOperand &CntOp =
I.getOperand(2);
7173 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7174 std::optional<int64_t> CntImm =
7176 if (CntImm && *CntImm == 0) {
7177 std::optional<int64_t> BarValImm =
7180 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7181 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7183 I.eraseFromParent();
7190 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7196 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7203 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7209 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7210 constexpr unsigned ShAmt = 16;
7216 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7226 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7227 ? AMDGPU::S_BARRIER_INIT_M0
7228 : AMDGPU::S_BARRIER_SIGNAL_M0;
7229 MachineInstrBuilder MIB;
7232 I.eraseFromParent();
7236bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7240 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7243 std::optional<int64_t> BarValImm =
7248 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7254 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7260 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7265 MachineInstrBuilder MIB;
7269 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7270 auto DstReg =
I.getOperand(0).getReg();
7271 const TargetRegisterClass *DstRC =
7272 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7273 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7279 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7283 I.eraseFromParent();
7290 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7291 "Expected G_CONSTANT");
7292 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7298 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7299 "Expected G_CONSTANT");
7300 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7306 const MachineOperand &
Op =
MI.getOperand(1);
7307 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7308 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7311void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7313 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7314 "Expected G_CONSTANT");
7315 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7323 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7340 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7344void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7346 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7351void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7353 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7359void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7361 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7366void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7368 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7374void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7376 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7381void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7383 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7388void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7390 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7395void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7397 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7406 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7415 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7422void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7424 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7425 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7440 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7442 assert(ExpVal != INT_MIN);
7460 if (
MI.getOperand(
OpIdx).getImm())
7462 MIB.
addImm((int64_t)Mods);
7469 if (
MI.getOperand(
OpIdx).getImm())
7471 MIB.
addImm((int64_t)Mods);
7477 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7485 MIB.
addImm((int64_t)Mods);
7491 uint32_t
V =
MI.getOperand(2).getImm();
7494 if (!Subtarget->hasSafeCUPrefetch())
7500void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7502 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7511bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7512 return TII.isInlineConstant(Imm);
7515bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7516 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.