29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
248 Register DstReg =
I.getOperand(0).getReg();
252 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
255bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
259 Register DstReg =
I.getOperand(0).getReg();
260 Register SrcReg =
I.getOperand(1).getReg();
261 std::optional<ValueAndVReg> Arg =
265 const int64_t
Value = Arg->Value.getZExtValue();
267 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
274 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
280 unsigned SelectOpcode =
281 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
304bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
305 const Register DefReg =
I.getOperand(0).getReg();
306 const LLT DefTy = MRI->getType(DefReg);
318 MRI->getRegClassOrRegBank(DefReg);
320 const TargetRegisterClass *DefRC =
329 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
338 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
339 const Register SrcReg =
I.getOperand(i).getReg();
341 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
343 const LLT SrcTy = MRI->getType(SrcReg);
344 const TargetRegisterClass *SrcRC =
345 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
346 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
351 I.setDesc(TII.get(TargetOpcode::PHI));
352 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
358 unsigned SubIdx)
const {
362 Register DstReg = MRI->createVirtualRegister(&SubRC);
365 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
367 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
393 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
395 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
397 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
403bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
404 Register DstReg =
I.getOperand(0).getReg();
405 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
407 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
408 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
409 DstRB->
getID() != AMDGPU::VCCRegBankID)
412 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
424bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
427 Register DstReg =
I.getOperand(0).getReg();
429 LLT Ty = MRI->getType(DstReg);
434 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
435 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
436 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
440 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
443 .
add(
I.getOperand(1))
444 .
add(
I.getOperand(2))
450 if (STI.hasAddNoCarry()) {
451 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
452 I.setDesc(TII.get(
Opc));
458 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
460 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
464 .
add(
I.getOperand(1))
465 .
add(
I.getOperand(2))
471 assert(!
Sub &&
"illegal sub should not reach here");
473 const TargetRegisterClass &RC
474 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
475 const TargetRegisterClass &HalfRC
476 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
478 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
479 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
480 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
481 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
483 Register DstLo = MRI->createVirtualRegister(&HalfRC);
484 Register DstHi = MRI->createVirtualRegister(&HalfRC);
487 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
490 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
495 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
496 Register CarryReg = MRI->createVirtualRegister(CarryRC);
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
502 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
513 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
520 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
527bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
532 Register Dst0Reg =
I.getOperand(0).getReg();
533 Register Dst1Reg =
I.getOperand(1).getReg();
534 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
535 I.getOpcode() == AMDGPU::G_UADDE;
536 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
537 I.getOpcode() == AMDGPU::G_USUBE;
539 if (isVCC(Dst1Reg, *MRI)) {
540 unsigned NoCarryOpc =
541 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
542 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
543 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
549 Register Src0Reg =
I.getOperand(2).getReg();
550 Register Src1Reg =
I.getOperand(3).getReg();
553 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
554 .
addReg(
I.getOperand(4).getReg());
557 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
558 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
560 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
561 .
add(
I.getOperand(2))
562 .
add(
I.getOperand(3));
564 if (MRI->use_nodbg_empty(Dst1Reg)) {
565 CarryInst.setOperandDead(3);
567 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
569 if (!MRI->getRegClassOrNull(Dst1Reg))
570 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
573 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
574 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
575 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
579 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
580 AMDGPU::SReg_32RegClass, *MRI))
587bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
591 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
592 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
593 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
596 if (Subtarget->hasMADIntraFwdBug())
597 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
598 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
600 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
601 : AMDGPU::V_MAD_NC_I64_I32_e64;
603 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
608 I.setDesc(TII.get(
Opc));
610 I.addImplicitDefUseOperands(*
MF);
611 I.getOperand(0).setIsEarlyClobber(
true);
616bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
618 Register DstReg =
I.getOperand(0).getReg();
619 Register SrcReg =
I.getOperand(1).getReg();
620 LLT DstTy = MRI->getType(DstReg);
621 LLT SrcTy = MRI->getType(SrcReg);
626 unsigned Offset =
I.getOperand(2).getImm();
627 if (
Offset % 32 != 0 || DstSize > 128)
635 const TargetRegisterClass *DstRC =
636 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
637 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
640 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
641 const TargetRegisterClass *SrcRC =
642 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
647 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
652 *SrcRC,
I.getOperand(1));
654 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
662 MachineBasicBlock *BB =
MI.getParent();
664 LLT DstTy = MRI->getType(DstReg);
665 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
672 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
674 const TargetRegisterClass *DstRC =
675 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
679 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
680 MachineInstrBuilder MIB =
681 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
682 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
683 MachineOperand &Src =
MI.getOperand(
I + 1);
687 const TargetRegisterClass *SrcRC
688 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
689 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
693 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
696 MI.eraseFromParent();
700bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
701 MachineBasicBlock *BB =
MI.getParent();
702 const int NumDst =
MI.getNumOperands() - 1;
704 MachineOperand &Src =
MI.getOperand(NumDst);
708 LLT DstTy = MRI->getType(DstReg0);
709 LLT SrcTy = MRI->getType(SrcReg);
714 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
716 const TargetRegisterClass *SrcRC =
717 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
718 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
724 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
725 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
726 MachineOperand &Dst =
MI.getOperand(
I);
727 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
728 .
addReg(SrcReg, 0, SubRegs[
I]);
731 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
732 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
735 const TargetRegisterClass *DstRC =
736 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
737 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
741 MI.eraseFromParent();
745bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
746 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
747 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
751 LLT SrcTy = MRI->getType(Src0);
755 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
756 return selectG_MERGE_VALUES(
MI);
763 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
767 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
768 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
771 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
772 DstBank->
getID() == AMDGPU::VGPRRegBankID);
773 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
776 MachineBasicBlock *BB =
MI.getParent();
786 const int64_t K0 = ConstSrc0->Value.getSExtValue();
787 const int64_t K1 = ConstSrc1->Value.getSExtValue();
788 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
789 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
790 uint32_t
Imm = Lo16 | (Hi16 << 16);
795 MI.eraseFromParent();
796 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
813 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
814 MI.setDesc(TII.get(AMDGPU::COPY));
817 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
818 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
819 RBI.constrainGenericRegister(Src0, RC, *MRI);
824 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
825 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
831 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
838 MI.eraseFromParent();
863 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
864 if (Shift0 && Shift1) {
865 Opc = AMDGPU::S_PACK_HH_B32_B16;
866 MI.getOperand(1).setReg(ShiftSrc0);
867 MI.getOperand(2).setReg(ShiftSrc1);
869 Opc = AMDGPU::S_PACK_LH_B32_B16;
870 MI.getOperand(2).setReg(ShiftSrc1);
874 if (ConstSrc1 && ConstSrc1->Value == 0) {
876 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
881 MI.eraseFromParent();
884 if (STI.hasSPackHL()) {
885 Opc = AMDGPU::S_PACK_HL_B32_B16;
886 MI.getOperand(1).setReg(ShiftSrc0);
890 MI.setDesc(TII.get(
Opc));
894bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
895 const MachineOperand &MO =
I.getOperand(0);
899 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
900 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
901 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
902 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
909bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
912 Register DstReg =
I.getOperand(0).getReg();
913 Register Src0Reg =
I.getOperand(1).getReg();
914 Register Src1Reg =
I.getOperand(2).getReg();
915 LLT Src1Ty = MRI->getType(Src1Reg);
917 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
920 int64_t
Offset =
I.getOperand(3).getImm();
923 if (
Offset % 32 != 0 || InsSize % 32 != 0)
930 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
931 if (
SubReg == AMDGPU::NoSubRegister)
934 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
935 const TargetRegisterClass *DstRC =
936 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
940 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
941 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
942 const TargetRegisterClass *Src0RC =
943 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
944 const TargetRegisterClass *Src1RC =
945 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
949 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
950 if (!Src0RC || !Src1RC)
953 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
954 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
955 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
959 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
968bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
971 Register OffsetReg =
MI.getOperand(2).getReg();
972 Register WidthReg =
MI.getOperand(3).getReg();
974 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
975 "scalar BFX instructions are expanded in regbankselect");
976 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
977 "64-bit vector BFX instructions are expanded in regbankselect");
980 MachineBasicBlock *
MBB =
MI.getParent();
982 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
983 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
988 MI.eraseFromParent();
992bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
993 if (STI.getLDSBankCount() != 16)
999 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1000 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1001 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1011 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1013 MachineBasicBlock *
MBB =
MI.getParent();
1017 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1020 .
addImm(
MI.getOperand(3).getImm());
1033 MI.eraseFromParent();
1042bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1044 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1047 MachineBasicBlock *
MBB =
MI.getParent();
1051 Register LaneSelect =
MI.getOperand(3).getReg();
1054 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1056 std::optional<ValueAndVReg> ConstSelect =
1062 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1065 std::optional<ValueAndVReg> ConstVal =
1071 STI.hasInv2PiInlineImm())) {
1072 MIB.
addImm(ConstVal->Value.getSExtValue());
1080 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1082 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1090 MI.eraseFromParent();
1096bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1100 LLT Ty = MRI->getType(Dst0);
1103 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1105 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1118 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1131 MI.eraseFromParent();
1135bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1137 switch (IntrinsicID) {
1138 case Intrinsic::amdgcn_if_break: {
1143 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1144 .
add(
I.getOperand(0))
1145 .
add(
I.getOperand(2))
1146 .
add(
I.getOperand(3));
1148 Register DstReg =
I.getOperand(0).getReg();
1149 Register Src0Reg =
I.getOperand(2).getReg();
1150 Register Src1Reg =
I.getOperand(3).getReg();
1152 I.eraseFromParent();
1155 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1159 case Intrinsic::amdgcn_interp_p1_f16:
1160 return selectInterpP1F16(
I);
1161 case Intrinsic::amdgcn_wqm:
1162 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1163 case Intrinsic::amdgcn_softwqm:
1164 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1165 case Intrinsic::amdgcn_strict_wwm:
1166 case Intrinsic::amdgcn_wwm:
1167 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1168 case Intrinsic::amdgcn_strict_wqm:
1169 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1170 case Intrinsic::amdgcn_writelane:
1171 return selectWritelane(
I);
1172 case Intrinsic::amdgcn_div_scale:
1173 return selectDivScale(
I);
1174 case Intrinsic::amdgcn_icmp:
1175 case Intrinsic::amdgcn_fcmp:
1178 return selectIntrinsicCmp(
I);
1179 case Intrinsic::amdgcn_ballot:
1180 return selectBallot(
I);
1181 case Intrinsic::amdgcn_reloc_constant:
1182 return selectRelocConstant(
I);
1183 case Intrinsic::amdgcn_groupstaticsize:
1184 return selectGroupStaticSize(
I);
1185 case Intrinsic::returnaddress:
1186 return selectReturnAddress(
I);
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1189 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1190 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1191 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1192 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1200 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1201 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1202 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1204 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1205 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1206 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1207 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1208 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1209 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1212 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1215 return selectSMFMACIntrin(
I);
1216 case Intrinsic::amdgcn_permlane16_swap:
1217 case Intrinsic::amdgcn_permlane32_swap:
1218 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1229 if (
Size == 16 && !ST.has16BitInsts())
1232 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1233 unsigned FakeS16Opc,
unsigned S32Opc,
1236 return ST.hasTrue16BitInsts()
1237 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1248 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1249 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1250 AMDGPU::V_CMP_NE_U64_e64);
1252 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1253 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1254 AMDGPU::V_CMP_EQ_U64_e64);
1256 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1257 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1258 AMDGPU::V_CMP_GT_I64_e64);
1260 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1261 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1262 AMDGPU::V_CMP_GE_I64_e64);
1264 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1265 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1266 AMDGPU::V_CMP_LT_I64_e64);
1268 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1269 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1270 AMDGPU::V_CMP_LE_I64_e64);
1272 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1273 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1274 AMDGPU::V_CMP_GT_U64_e64);
1276 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1277 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1278 AMDGPU::V_CMP_GE_U64_e64);
1280 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1281 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1282 AMDGPU::V_CMP_LT_U64_e64);
1284 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1285 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1286 AMDGPU::V_CMP_LE_U64_e64);
1289 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1290 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1291 AMDGPU::V_CMP_EQ_F64_e64);
1293 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1294 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1295 AMDGPU::V_CMP_GT_F64_e64);
1297 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1298 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1299 AMDGPU::V_CMP_GE_F64_e64);
1301 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1302 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1303 AMDGPU::V_CMP_LT_F64_e64);
1305 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1306 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1307 AMDGPU::V_CMP_LE_F64_e64);
1309 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1310 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1311 AMDGPU::V_CMP_NEQ_F64_e64);
1313 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1314 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1315 AMDGPU::V_CMP_O_F64_e64);
1317 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1318 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1319 AMDGPU::V_CMP_U_F64_e64);
1321 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1322 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1323 AMDGPU::V_CMP_NLG_F64_e64);
1325 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1326 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1327 AMDGPU::V_CMP_NLE_F64_e64);
1329 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1330 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1331 AMDGPU::V_CMP_NLT_F64_e64);
1333 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1334 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1335 AMDGPU::V_CMP_NGE_F64_e64);
1337 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1338 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1339 AMDGPU::V_CMP_NGT_F64_e64);
1341 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1342 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1343 AMDGPU::V_CMP_NEQ_F64_e64);
1345 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1346 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1347 AMDGPU::V_CMP_TRU_F64_e64);
1349 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1350 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1351 AMDGPU::V_CMP_F_F64_e64);
1356 unsigned Size)
const {
1358 if (!STI.hasScalarCompareEq64())
1363 return AMDGPU::S_CMP_LG_U64;
1365 return AMDGPU::S_CMP_EQ_U64;
1374 return AMDGPU::S_CMP_LG_U32;
1376 return AMDGPU::S_CMP_EQ_U32;
1378 return AMDGPU::S_CMP_GT_I32;
1380 return AMDGPU::S_CMP_GE_I32;
1382 return AMDGPU::S_CMP_LT_I32;
1384 return AMDGPU::S_CMP_LE_I32;
1386 return AMDGPU::S_CMP_GT_U32;
1388 return AMDGPU::S_CMP_GE_U32;
1390 return AMDGPU::S_CMP_LT_U32;
1392 return AMDGPU::S_CMP_LE_U32;
1394 return AMDGPU::S_CMP_EQ_F32;
1396 return AMDGPU::S_CMP_GT_F32;
1398 return AMDGPU::S_CMP_GE_F32;
1400 return AMDGPU::S_CMP_LT_F32;
1402 return AMDGPU::S_CMP_LE_F32;
1404 return AMDGPU::S_CMP_LG_F32;
1406 return AMDGPU::S_CMP_O_F32;
1408 return AMDGPU::S_CMP_U_F32;
1410 return AMDGPU::S_CMP_NLG_F32;
1412 return AMDGPU::S_CMP_NLE_F32;
1414 return AMDGPU::S_CMP_NLT_F32;
1416 return AMDGPU::S_CMP_NGE_F32;
1418 return AMDGPU::S_CMP_NGT_F32;
1420 return AMDGPU::S_CMP_NEQ_F32;
1427 if (!STI.hasSALUFloatInsts())
1432 return AMDGPU::S_CMP_EQ_F16;
1434 return AMDGPU::S_CMP_GT_F16;
1436 return AMDGPU::S_CMP_GE_F16;
1438 return AMDGPU::S_CMP_LT_F16;
1440 return AMDGPU::S_CMP_LE_F16;
1442 return AMDGPU::S_CMP_LG_F16;
1444 return AMDGPU::S_CMP_O_F16;
1446 return AMDGPU::S_CMP_U_F16;
1448 return AMDGPU::S_CMP_NLG_F16;
1450 return AMDGPU::S_CMP_NLE_F16;
1452 return AMDGPU::S_CMP_NLT_F16;
1454 return AMDGPU::S_CMP_NGE_F16;
1456 return AMDGPU::S_CMP_NGT_F16;
1458 return AMDGPU::S_CMP_NEQ_F16;
1467bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1472 Register SrcReg =
I.getOperand(2).getReg();
1473 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1477 Register CCReg =
I.getOperand(0).getReg();
1478 if (!isVCC(CCReg, *MRI)) {
1479 int Opcode = getS_CMPOpcode(Pred,
Size);
1482 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1483 .
add(
I.getOperand(2))
1484 .
add(
I.getOperand(3));
1485 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1489 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1490 I.eraseFromParent();
1494 if (
I.getOpcode() == AMDGPU::G_FCMP)
1501 MachineInstrBuilder ICmp;
1504 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1506 .
add(
I.getOperand(2))
1508 .
add(
I.getOperand(3))
1511 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1512 .
add(
I.getOperand(2))
1513 .
add(
I.getOperand(3));
1517 *TRI.getBoolRC(), *MRI);
1519 I.eraseFromParent();
1523bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1524 Register Dst =
I.getOperand(0).getReg();
1525 if (isVCC(Dst, *MRI))
1528 LLT DstTy = MRI->getType(Dst);
1534 Register SrcReg =
I.getOperand(2).getReg();
1535 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1543 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1544 I.eraseFromParent();
1545 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1552 MachineInstrBuilder SelectedMI;
1553 MachineOperand &
LHS =
I.getOperand(2);
1554 MachineOperand &
RHS =
I.getOperand(3);
1555 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1556 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1558 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1560 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1561 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1563 SelectedMI.
addImm(Src0Mods);
1564 SelectedMI.
addReg(Src0Reg);
1566 SelectedMI.
addImm(Src1Mods);
1567 SelectedMI.
addReg(Src1Reg);
1573 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1577 I.eraseFromParent();
1588 if (
MI->getParent() !=
MBB)
1592 if (
MI->getOpcode() == AMDGPU::COPY) {
1593 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1594 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1595 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1596 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1613bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1616 Register DstReg =
I.getOperand(0).getReg();
1617 Register SrcReg =
I.getOperand(2).getReg();
1618 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1619 const unsigned WaveSize = STI.getWavefrontSize();
1623 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1626 std::optional<ValueAndVReg> Arg =
1631 if (BallotSize != WaveSize) {
1632 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1636 const int64_t
Value = Arg->Value.getZExtValue();
1639 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1646 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1652 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1656 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1667 if (BallotSize != WaveSize) {
1668 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1670 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1677 I.eraseFromParent();
1681bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1682 Register DstReg =
I.getOperand(0).getReg();
1683 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1684 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1685 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1688 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1690 Module *
M =
MF->getFunction().getParent();
1691 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1698 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1701 I.eraseFromParent();
1705bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1708 Register DstReg =
I.getOperand(0).getReg();
1709 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1710 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1711 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1719 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1722 Module *
M =
MF->getFunction().getParent();
1723 const GlobalValue *GV =
1728 I.eraseFromParent();
1732bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1737 MachineOperand &Dst =
I.getOperand(0);
1739 unsigned Depth =
I.getOperand(2).getImm();
1741 const TargetRegisterClass *RC
1742 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1744 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1749 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1752 I.eraseFromParent();
1756 MachineFrameInfo &MFI =
MF.getFrameInfo();
1761 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1763 AMDGPU::SReg_64RegClass,
DL);
1766 I.eraseFromParent();
1770bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1773 MachineBasicBlock *BB =
MI.getParent();
1774 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1775 .
add(
MI.getOperand(1));
1778 MI.eraseFromParent();
1780 if (!MRI->getRegClassOrNull(
Reg))
1781 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1785bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1787 MachineBasicBlock *
MBB =
MI.getParent();
1791 unsigned IndexOperand =
MI.getOperand(7).getImm();
1792 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1793 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1795 if (WaveDone && !WaveRelease) {
1799 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1802 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1803 IndexOperand &= ~0x3f;
1804 unsigned CountDw = 0;
1807 CountDw = (IndexOperand >> 24) & 0xf;
1808 IndexOperand &= ~(0xf << 24);
1810 if (CountDw < 1 || CountDw > 4) {
1813 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1821 Fn,
"ds_ordered_count: bad index operand",
DL));
1824 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1827 unsigned Offset0 = OrderedCountIndex << 2;
1828 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1831 Offset1 |= (CountDw - 1) << 6;
1834 Offset1 |= ShaderType << 2;
1836 unsigned Offset = Offset0 | (Offset1 << 8);
1844 MachineInstrBuilder
DS =
1845 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1850 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1854 MI.eraseFromParent();
1860 case Intrinsic::amdgcn_ds_gws_init:
1861 return AMDGPU::DS_GWS_INIT;
1862 case Intrinsic::amdgcn_ds_gws_barrier:
1863 return AMDGPU::DS_GWS_BARRIER;
1864 case Intrinsic::amdgcn_ds_gws_sema_v:
1865 return AMDGPU::DS_GWS_SEMA_V;
1866 case Intrinsic::amdgcn_ds_gws_sema_br:
1867 return AMDGPU::DS_GWS_SEMA_BR;
1868 case Intrinsic::amdgcn_ds_gws_sema_p:
1869 return AMDGPU::DS_GWS_SEMA_P;
1870 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1871 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1877bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1879 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1880 !STI.hasGWSSemaReleaseAll()))
1884 const bool HasVSrc =
MI.getNumOperands() == 3;
1885 assert(HasVSrc ||
MI.getNumOperands() == 2);
1887 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1888 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1889 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1895 MachineBasicBlock *
MBB =
MI.getParent();
1898 MachineInstr *Readfirstlane =
nullptr;
1903 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1904 Readfirstlane = OffsetDef;
1909 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1919 std::tie(BaseOffset, ImmOffset) =
1922 if (Readfirstlane) {
1925 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1931 if (!RBI.constrainGenericRegister(BaseOffset,
1932 AMDGPU::SReg_32RegClass, *MRI))
1936 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1955 if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
1962 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
1964 MI.eraseFromParent();
1968bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1969 bool IsAppend)
const {
1970 Register PtrBase =
MI.getOperand(2).getReg();
1971 LLT PtrTy = MRI->getType(PtrBase);
1975 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1978 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1979 PtrBase =
MI.getOperand(2).getReg();
1983 MachineBasicBlock *
MBB =
MI.getParent();
1985 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1989 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
1996 MI.eraseFromParent();
2000bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2001 MachineFunction *
MF =
MI.getParent()->getParent();
2002 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2013 TFE = TexFailCtrl & 0x1;
2015 LWE = TexFailCtrl & 0x2;
2018 return TexFailCtrl == 0;
2021bool AMDGPUInstructionSelector::selectImageIntrinsic(
2023 MachineBasicBlock *
MBB =
MI.getParent();
2029 Register ResultDef =
MI.getOperand(0).getReg();
2030 if (MRI->use_nodbg_empty(ResultDef))
2034 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2042 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2044 Register VDataIn = AMDGPU::NoRegister;
2045 Register VDataOut = AMDGPU::NoRegister;
2047 int NumVDataDwords = -1;
2048 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2049 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2055 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2059 bool IsTexFail =
false;
2061 TFE, LWE, IsTexFail))
2064 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2065 const bool IsA16 = (
Flags & 1) != 0;
2066 const bool IsG16 = (
Flags & 2) != 0;
2069 if (IsA16 && !STI.hasG16() && !IsG16)
2073 unsigned DMaskLanes = 0;
2075 if (BaseOpcode->
Atomic) {
2077 VDataOut =
MI.getOperand(0).getReg();
2078 VDataIn =
MI.getOperand(2).getReg();
2079 LLT Ty = MRI->getType(VDataIn);
2082 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2087 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2089 DMask = Is64Bit ? 0xf : 0x3;
2090 NumVDataDwords = Is64Bit ? 4 : 2;
2092 DMask = Is64Bit ? 0x3 : 0x1;
2093 NumVDataDwords = Is64Bit ? 2 : 1;
2096 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2099 if (BaseOpcode->
Store) {
2100 VDataIn =
MI.getOperand(1).getReg();
2101 VDataTy = MRI->getType(VDataIn);
2106 VDataOut =
MI.getOperand(0).getReg();
2107 VDataTy = MRI->getType(VDataOut);
2108 NumVDataDwords = DMaskLanes;
2110 if (IsD16 && !STI.hasUnpackedD16VMem())
2111 NumVDataDwords = (DMaskLanes + 1) / 2;
2116 if (Subtarget->hasG16() && IsG16) {
2117 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2120 IntrOpcode = G16MappingInfo->
G16;
2124 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2134 int NumVAddrRegs = 0;
2135 int NumVAddrDwords = 0;
2138 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2139 if (!AddrOp.
isReg())
2147 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2154 NumVAddrRegs != 1 &&
2155 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2156 : NumVAddrDwords == NumVAddrRegs);
2157 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2168 NumVDataDwords, NumVAddrDwords);
2169 }
else if (IsGFX11Plus) {
2171 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2172 : AMDGPU::MIMGEncGfx11Default,
2173 NumVDataDwords, NumVAddrDwords);
2174 }
else if (IsGFX10Plus) {
2176 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2177 : AMDGPU::MIMGEncGfx10Default,
2178 NumVDataDwords, NumVAddrDwords);
2180 if (Subtarget->hasGFX90AInsts()) {
2182 NumVDataDwords, NumVAddrDwords);
2186 <<
"requested image instruction is not supported on this GPU\n");
2193 NumVDataDwords, NumVAddrDwords);
2196 NumVDataDwords, NumVAddrDwords);
2206 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2208 Register TmpReg = MRI->createVirtualRegister(
2209 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2210 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2213 if (!MRI->use_empty(VDataOut)) {
2226 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2227 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2228 if (SrcOp.
isReg()) {
2247 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2249 MIB.
addImm(IsA16 ? -1 : 0);
2251 if (!Subtarget->hasGFX90AInsts()) {
2263 MIB.
addImm(IsD16 ? -1 : 0);
2265 MI.eraseFromParent();
2267 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2273bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2279 MachineBasicBlock *
MBB =
MI.getParent();
2284 unsigned Offset =
MI.getOperand(6).getImm();
2288 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2289 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2290 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2292 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2293 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2295 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2296 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2308 MI.eraseFromParent();
2312bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2315 switch (IntrinsicID) {
2316 case Intrinsic::amdgcn_end_cf:
2317 return selectEndCfIntrinsic(
I);
2318 case Intrinsic::amdgcn_ds_ordered_add:
2319 case Intrinsic::amdgcn_ds_ordered_swap:
2320 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2321 case Intrinsic::amdgcn_ds_gws_init:
2322 case Intrinsic::amdgcn_ds_gws_barrier:
2323 case Intrinsic::amdgcn_ds_gws_sema_v:
2324 case Intrinsic::amdgcn_ds_gws_sema_br:
2325 case Intrinsic::amdgcn_ds_gws_sema_p:
2326 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2327 return selectDSGWSIntrinsic(
I, IntrinsicID);
2328 case Intrinsic::amdgcn_ds_append:
2329 return selectDSAppendConsume(
I,
true);
2330 case Intrinsic::amdgcn_ds_consume:
2331 return selectDSAppendConsume(
I,
false);
2332 case Intrinsic::amdgcn_init_whole_wave:
2333 return selectInitWholeWave(
I);
2334 case Intrinsic::amdgcn_raw_buffer_load_lds:
2335 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2336 case Intrinsic::amdgcn_struct_buffer_load_lds:
2337 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2338 return selectBufferLoadLds(
I);
2343 case Intrinsic::amdgcn_load_to_lds:
2344 case Intrinsic::amdgcn_global_load_lds:
2345 return selectGlobalLoadLds(
I);
2346 case Intrinsic::amdgcn_exp_compr:
2347 if (!STI.hasCompressedExport()) {
2349 F.getContext().diagnose(
2350 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2355 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2356 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2357 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2358 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2359 return selectDSBvhStackIntrinsic(
I);
2360 case Intrinsic::amdgcn_s_barrier_init:
2361 case Intrinsic::amdgcn_s_barrier_signal_var:
2362 return selectNamedBarrierInit(
I, IntrinsicID);
2363 case Intrinsic::amdgcn_s_barrier_join:
2364 case Intrinsic::amdgcn_s_get_named_barrier_state:
2365 return selectNamedBarrierInst(
I, IntrinsicID);
2366 case Intrinsic::amdgcn_s_get_barrier_state:
2367 return selectSGetBarrierState(
I, IntrinsicID);
2368 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2369 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2374bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2381 Register DstReg =
I.getOperand(0).getReg();
2382 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2384 const MachineOperand &CCOp =
I.getOperand(1);
2386 if (!isVCC(CCReg, *MRI)) {
2387 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2388 AMDGPU::S_CSELECT_B32;
2389 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2395 if (!MRI->getRegClassOrNull(CCReg))
2396 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2398 .
add(
I.getOperand(2))
2399 .
add(
I.getOperand(3));
2404 I.eraseFromParent();
2413 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2415 .
add(
I.getOperand(3))
2417 .
add(
I.getOperand(2))
2418 .
add(
I.getOperand(1));
2421 I.eraseFromParent();
2425bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2426 Register DstReg =
I.getOperand(0).getReg();
2427 Register SrcReg =
I.getOperand(1).getReg();
2428 const LLT DstTy = MRI->getType(DstReg);
2429 const LLT SrcTy = MRI->getType(SrcReg);
2432 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2433 const RegisterBank *DstRB;
2439 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2444 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2449 const TargetRegisterClass *SrcRC =
2450 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2451 const TargetRegisterClass *DstRC =
2452 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2453 if (!SrcRC || !DstRC)
2456 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2457 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2462 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2463 assert(STI.useRealTrue16Insts());
2467 .
addReg(SrcReg, 0, AMDGPU::lo16);
2468 I.eraseFromParent();
2476 Register LoReg = MRI->createVirtualRegister(DstRC);
2477 Register HiReg = MRI->createVirtualRegister(DstRC);
2479 .
addReg(SrcReg, 0, AMDGPU::sub0);
2481 .
addReg(SrcReg, 0, AMDGPU::sub1);
2483 if (IsVALU && STI.hasSDWA()) {
2486 MachineInstr *MovSDWA =
2487 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2497 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2498 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2499 Register ImmReg = MRI->createVirtualRegister(DstRC);
2501 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2511 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2512 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2513 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2525 And.setOperandDead(3);
2526 Or.setOperandDead(3);
2530 I.eraseFromParent();
2538 unsigned SubRegIdx = DstSize < 32
2539 ?
static_cast<unsigned>(AMDGPU::sub0)
2540 : TRI.getSubRegFromChannel(0, DstSize / 32);
2541 if (SubRegIdx == AMDGPU::NoSubRegister)
2546 const TargetRegisterClass *SrcWithSubRC
2547 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2551 if (SrcWithSubRC != SrcRC) {
2552 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2556 I.getOperand(1).setSubReg(SubRegIdx);
2559 I.setDesc(TII.get(TargetOpcode::COPY));
2566 int SignedMask =
static_cast<int>(Mask);
2567 return SignedMask >= -16 && SignedMask <= 64;
2571const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2580 return &RBI.getRegBankFromRegClass(*RC, LLT());
2584bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2585 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2586 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2589 const Register DstReg =
I.getOperand(0).getReg();
2590 const Register SrcReg =
I.getOperand(1).getReg();
2592 const LLT DstTy = MRI->getType(DstReg);
2593 const LLT SrcTy = MRI->getType(SrcReg);
2594 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2601 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2604 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2606 return selectCOPY(
I);
2608 const TargetRegisterClass *SrcRC =
2609 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2610 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2611 const TargetRegisterClass *DstRC =
2612 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2614 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2615 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2621 I.eraseFromParent();
2623 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2624 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2627 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2633 MachineInstr *ExtI =
2637 I.eraseFromParent();
2641 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2642 MachineInstr *ExtI =
2647 I.eraseFromParent();
2651 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2652 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2653 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2654 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2657 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2658 const unsigned SextOpc = SrcSize == 8 ?
2659 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2662 I.eraseFromParent();
2663 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2668 if (DstSize > 32 && SrcSize == 32) {
2669 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2670 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2685 I.eraseFromParent();
2686 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2690 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2691 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2694 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2696 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2697 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2698 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2700 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2711 I.eraseFromParent();
2712 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2727 I.eraseFromParent();
2728 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2762 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2769 assert(Mask.size() == 2);
2771 if (Mask[0] == 1 && Mask[1] <= 1) {
2779bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2780 if (!Subtarget->hasSALUFloatInsts())
2783 Register Dst =
I.getOperand(0).getReg();
2784 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2785 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2788 Register Src =
I.getOperand(1).getReg();
2794 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2796 I.eraseFromParent();
2797 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2804bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2817 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2818 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2823 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2827 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2828 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2831 MachineBasicBlock *BB =
MI.getParent();
2833 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2834 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2835 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2836 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2838 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2839 .
addReg(Src, 0, AMDGPU::sub0);
2840 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2841 .
addReg(Src, 0, AMDGPU::sub1);
2842 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2846 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2851 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2856 MI.eraseFromParent();
2861bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2863 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2864 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2869 MachineBasicBlock *BB =
MI.getParent();
2871 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2872 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2873 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2874 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2876 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2877 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2880 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2881 .
addReg(Src, 0, AMDGPU::sub0);
2882 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2883 .
addReg(Src, 0, AMDGPU::sub1);
2884 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2889 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2893 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2899 MI.eraseFromParent();
2904 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2907void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2910 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2911 const MachineInstr *PtrMI =
2912 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2916 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2921 for (
unsigned i = 1; i != 3; ++i) {
2922 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2923 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2928 assert(GEPInfo.Imm == 0);
2932 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2933 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2934 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2936 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2940 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2943bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2944 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2947bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2948 if (!
MI.hasOneMemOperand())
2951 const MachineMemOperand *MMO = *
MI.memoperands_begin();
2964 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2965 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2966 AMDGPU::SGPRRegBankID;
2969 return I &&
I->getMetadata(
"amdgpu.uniform");
2973 for (
const GEPInfo &GEPInfo : AddrInfo) {
2974 if (!GEPInfo.VgprParts.empty())
2980void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2981 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
2984 STI.ldsRequiresM0Init()) {
2988 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2993bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3000 if (
Reg.isPhysical())
3004 const unsigned Opcode =
MI.getOpcode();
3006 if (Opcode == AMDGPU::COPY)
3009 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3010 Opcode == AMDGPU::G_XOR)
3015 return GI->is(Intrinsic::amdgcn_class);
3017 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3020bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3022 MachineOperand &CondOp =
I.getOperand(0);
3028 const TargetRegisterClass *ConstrainRC;
3035 if (!isVCC(CondReg, *MRI)) {
3039 CondPhysReg = AMDGPU::SCC;
3040 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3041 ConstrainRC = &AMDGPU::SReg_32RegClass;
3048 const bool Is64 = STI.isWave64();
3049 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3050 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3052 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3053 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3060 CondPhysReg = TRI.getVCC();
3061 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3062 ConstrainRC = TRI.getBoolRC();
3065 if (!MRI->getRegClassOrNull(CondReg))
3066 MRI->setRegClass(CondReg, ConstrainRC);
3068 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3071 .
addMBB(
I.getOperand(1).getMBB());
3073 I.eraseFromParent();
3077bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3079 Register DstReg =
I.getOperand(0).getReg();
3080 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3081 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3082 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3086 return RBI.constrainGenericRegister(
3087 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3090bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3091 Register DstReg =
I.getOperand(0).getReg();
3092 Register SrcReg =
I.getOperand(1).getReg();
3093 Register MaskReg =
I.getOperand(2).getReg();
3094 LLT Ty = MRI->getType(DstReg);
3095 LLT MaskTy = MRI->getType(MaskReg);
3099 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3100 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3101 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3102 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3108 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3112 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3113 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3116 !CanCopyLow32 && !CanCopyHi32) {
3117 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3121 I.eraseFromParent();
3125 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3126 const TargetRegisterClass &RegRC
3127 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3129 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3130 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3131 const TargetRegisterClass *MaskRC =
3132 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3134 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3135 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3136 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3141 "ptrmask should have been narrowed during legalize");
3143 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3149 I.eraseFromParent();
3153 Register HiReg = MRI->createVirtualRegister(&RegRC);
3154 Register LoReg = MRI->createVirtualRegister(&RegRC);
3157 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3158 .
addReg(SrcReg, 0, AMDGPU::sub0);
3159 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3160 .
addReg(SrcReg, 0, AMDGPU::sub1);
3169 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3170 MaskedLo = MRI->createVirtualRegister(&RegRC);
3172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3173 .
addReg(MaskReg, 0, AMDGPU::sub0);
3174 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3183 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3184 MaskedHi = MRI->createVirtualRegister(&RegRC);
3186 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3187 .
addReg(MaskReg, 0, AMDGPU::sub1);
3188 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3193 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3198 I.eraseFromParent();
3204static std::pair<Register, unsigned>
3211 std::tie(IdxBaseReg,
Offset) =
3213 if (IdxBaseReg == AMDGPU::NoRegister) {
3217 IdxBaseReg = IdxReg;
3224 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3225 return std::pair(IdxReg, SubRegs[0]);
3226 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3229bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3235 LLT DstTy = MRI->getType(DstReg);
3236 LLT SrcTy = MRI->getType(SrcReg);
3238 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3239 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3240 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3244 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3247 const TargetRegisterClass *SrcRC =
3248 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3249 const TargetRegisterClass *DstRC =
3250 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3251 if (!SrcRC || !DstRC)
3253 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3254 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3255 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3258 MachineBasicBlock *BB =
MI.getParent();
3266 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3270 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3273 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3277 MI.eraseFromParent();
3284 if (!STI.useVGPRIndexMode()) {
3285 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3287 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3290 MI.eraseFromParent();
3294 const MCInstrDesc &GPRIDXDesc =
3295 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3301 MI.eraseFromParent();
3306bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3313 LLT VecTy = MRI->getType(DstReg);
3314 LLT ValTy = MRI->getType(ValReg);
3318 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3319 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3320 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3326 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3329 const TargetRegisterClass *VecRC =
3330 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3331 const TargetRegisterClass *ValRC =
3332 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3334 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3335 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3336 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3337 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3340 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3344 std::tie(IdxReg,
SubReg) =
3347 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3348 STI.useVGPRIndexMode();
3350 MachineBasicBlock *BB =
MI.getParent();
3354 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3357 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3358 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3363 MI.eraseFromParent();
3367 const MCInstrDesc &GPRIDXDesc =
3368 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3375 MI.eraseFromParent();
3379bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3380 if (!Subtarget->hasVMemToLDSLoad())
3383 unsigned Size =
MI.getOperand(3).getImm();
3386 const bool HasVIndex =
MI.getNumOperands() == 9;
3390 VIndex =
MI.getOperand(4).getReg();
3394 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3395 std::optional<ValueAndVReg> MaybeVOffset =
3397 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3403 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3404 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3405 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3406 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3409 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3410 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3411 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3412 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3415 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3416 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3417 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3418 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3421 if (!Subtarget->hasLDSLoadB96_B128())
3424 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3425 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3426 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3427 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3430 if (!Subtarget->hasLDSLoadB96_B128())
3433 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3434 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3435 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3436 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3440 MachineBasicBlock *
MBB =
MI.getParent();
3443 .
add(
MI.getOperand(2));
3447 if (HasVIndex && HasVOffset) {
3448 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3449 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3456 }
else if (HasVIndex) {
3458 }
else if (HasVOffset) {
3462 MIB.
add(
MI.getOperand(1));
3463 MIB.
add(
MI.getOperand(5 + OpOffset));
3464 MIB.
add(
MI.getOperand(6 + OpOffset));
3466 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3474 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3479 MachinePointerInfo StorePtrI = LoadPtrI;
3490 MachineMemOperand *StoreMMO =
3496 MI.eraseFromParent();
3508 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3514 return Def->getOperand(1).getReg();
3528 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3536 return Def->getOperand(1).getReg();
3538 if (
VT->signBitIsZero(
Reg))
3539 return matchZeroExtendFromS32(
Reg);
3547AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3549 : matchZeroExtendFromS32(
Reg);
3555AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3557 : matchSignExtendFromS32(
Reg);
3561AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3562 bool IsSigned)
const {
3564 return matchSignExtendFromS32OrS32(
Reg);
3566 return matchZeroExtendFromS32OrS32(
Reg);
3576 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3583 return Def->getOperand(1).getReg();
3588bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3589 if (!Subtarget->hasVMemToLDSLoad())
3593 unsigned Size =
MI.getOperand(3).getImm();
3599 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3602 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3605 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3608 if (!Subtarget->hasLDSLoadB96_B128())
3610 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3613 if (!Subtarget->hasLDSLoadB96_B128())
3615 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3619 MachineBasicBlock *
MBB =
MI.getParent();
3622 .
add(
MI.getOperand(2));
3628 if (!isSGPR(Addr)) {
3630 if (isSGPR(AddrDef->Reg)) {
3631 Addr = AddrDef->Reg;
3632 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3635 if (isSGPR(SAddr)) {
3636 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3637 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3648 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3660 MIB.
add(
MI.getOperand(4));
3662 unsigned Aux =
MI.getOperand(5).getImm();
3665 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3667 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3668 MachinePointerInfo StorePtrI = LoadPtrI;
3677 MachineMemOperand *StoreMMO =
3679 sizeof(int32_t),
Align(4));
3683 MI.eraseFromParent();
3687bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3689 unsigned OpcodeOpIdx =
3690 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3691 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3692 MI.removeOperand(OpcodeOpIdx);
3693 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3699bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3702 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3703 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3705 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3706 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3708 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3709 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3711 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3712 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3714 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3715 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3717 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3718 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3720 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3721 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3723 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3724 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3726 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3727 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3729 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3730 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3732 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3733 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3735 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3736 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3738 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3739 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3741 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3742 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3744 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3745 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3747 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3748 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3750 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3751 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3753 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3754 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3756 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3757 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3759 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3760 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3762 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3763 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3765 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3766 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3768 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3769 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3771 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3772 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3774 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3775 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3777 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3778 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3780 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3781 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3783 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3784 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3790 auto VDst_In =
MI.getOperand(4);
3792 MI.setDesc(TII.get(
Opc));
3793 MI.removeOperand(4);
3794 MI.removeOperand(1);
3795 MI.addOperand(VDst_In);
3796 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3797 const MCInstrDesc &MCID =
MI.getDesc();
3799 MI.getOperand(0).setIsEarlyClobber(
true);
3804bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3806 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3807 !Subtarget->hasPermlane16Swap())
3809 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3810 !Subtarget->hasPermlane32Swap())
3813 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3814 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3815 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3817 MI.removeOperand(2);
3818 MI.setDesc(TII.get(Opcode));
3821 MachineOperand &FI =
MI.getOperand(4);
3827bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3830 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3831 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3832 MachineBasicBlock *
MBB =
MI.getParent();
3836 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3837 .
addImm(Subtarget->getWavefrontSizeLog2())
3842 .
addImm(Subtarget->getWavefrontSizeLog2())
3846 const TargetRegisterClass &RC =
3847 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3848 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3851 MI.eraseFromParent();
3860 unsigned NumOpcodes = 0;
3873 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3884 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3898 if (Src.size() == 3) {
3905 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3906 if (Src[
I] ==
LHS) {
3916 Bits = SrcBits[Src.size()];
3922 switch (
MI->getOpcode()) {
3923 case TargetOpcode::G_AND:
3924 case TargetOpcode::G_OR:
3925 case TargetOpcode::G_XOR: {
3930 if (!getOperandBits(
LHS, LHSBits) ||
3931 !getOperandBits(
RHS, RHSBits)) {
3933 return std::make_pair(0, 0);
3939 NumOpcodes +=
Op.first;
3940 LHSBits =
Op.second;
3945 NumOpcodes +=
Op.first;
3946 RHSBits =
Op.second;
3951 return std::make_pair(0, 0);
3955 switch (
MI->getOpcode()) {
3956 case TargetOpcode::G_AND:
3957 TTbl = LHSBits & RHSBits;
3959 case TargetOpcode::G_OR:
3960 TTbl = LHSBits | RHSBits;
3962 case TargetOpcode::G_XOR:
3963 TTbl = LHSBits ^ RHSBits;
3969 return std::make_pair(NumOpcodes + 1, TTbl);
3972bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3973 if (!Subtarget->hasBitOp3Insts())
3977 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3978 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3984 unsigned NumOpcodes;
3986 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3990 if (NumOpcodes < 2 || Src.empty())
3993 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
3994 if (NumOpcodes == 2 && IsB32) {
4002 }
else if (NumOpcodes < 4) {
4009 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4010 if (!IsB32 && STI.hasTrue16BitInsts())
4011 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4012 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4013 unsigned CBL = STI.getConstantBusLimit(
Opc);
4014 MachineBasicBlock *
MBB =
MI.getParent();
4017 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4018 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4019 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4025 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4036 while (Src.size() < 3)
4037 Src.push_back(Src[0]);
4054 MI.eraseFromParent();
4059bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4061 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4064 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4066 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4068 MachineBasicBlock *
MBB =
MI.getParent();
4072 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4075 .
addImm(Subtarget->getWavefrontSizeLog2())
4082 MI.eraseFromParent();
4088 if (!
I.isPreISelOpcode()) {
4090 return selectCOPY(
I);
4094 switch (
I.getOpcode()) {
4095 case TargetOpcode::G_AND:
4096 case TargetOpcode::G_OR:
4097 case TargetOpcode::G_XOR:
4098 if (selectBITOP3(
I))
4102 return selectG_AND_OR_XOR(
I);
4103 case TargetOpcode::G_ADD:
4104 case TargetOpcode::G_SUB:
4105 case TargetOpcode::G_PTR_ADD:
4108 return selectG_ADD_SUB(
I);
4109 case TargetOpcode::G_UADDO:
4110 case TargetOpcode::G_USUBO:
4111 case TargetOpcode::G_UADDE:
4112 case TargetOpcode::G_USUBE:
4113 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4114 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4115 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4116 return selectG_AMDGPU_MAD_64_32(
I);
4117 case TargetOpcode::G_INTTOPTR:
4118 case TargetOpcode::G_BITCAST:
4119 case TargetOpcode::G_PTRTOINT:
4120 case TargetOpcode::G_FREEZE:
4121 return selectCOPY(
I);
4122 case TargetOpcode::G_FNEG:
4125 return selectG_FNEG(
I);
4126 case TargetOpcode::G_FABS:
4129 return selectG_FABS(
I);
4130 case TargetOpcode::G_EXTRACT:
4131 return selectG_EXTRACT(
I);
4132 case TargetOpcode::G_MERGE_VALUES:
4133 case TargetOpcode::G_CONCAT_VECTORS:
4134 return selectG_MERGE_VALUES(
I);
4135 case TargetOpcode::G_UNMERGE_VALUES:
4136 return selectG_UNMERGE_VALUES(
I);
4137 case TargetOpcode::G_BUILD_VECTOR:
4138 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4139 return selectG_BUILD_VECTOR(
I);
4140 case TargetOpcode::G_IMPLICIT_DEF:
4141 return selectG_IMPLICIT_DEF(
I);
4142 case TargetOpcode::G_INSERT:
4143 return selectG_INSERT(
I);
4144 case TargetOpcode::G_INTRINSIC:
4145 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4146 return selectG_INTRINSIC(
I);
4147 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4148 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4149 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4150 case TargetOpcode::G_ICMP:
4151 case TargetOpcode::G_FCMP:
4152 if (selectG_ICMP_or_FCMP(
I))
4155 case TargetOpcode::G_LOAD:
4156 case TargetOpcode::G_ZEXTLOAD:
4157 case TargetOpcode::G_SEXTLOAD:
4158 case TargetOpcode::G_STORE:
4159 case TargetOpcode::G_ATOMIC_CMPXCHG:
4160 case TargetOpcode::G_ATOMICRMW_XCHG:
4161 case TargetOpcode::G_ATOMICRMW_ADD:
4162 case TargetOpcode::G_ATOMICRMW_SUB:
4163 case TargetOpcode::G_ATOMICRMW_AND:
4164 case TargetOpcode::G_ATOMICRMW_OR:
4165 case TargetOpcode::G_ATOMICRMW_XOR:
4166 case TargetOpcode::G_ATOMICRMW_MIN:
4167 case TargetOpcode::G_ATOMICRMW_MAX:
4168 case TargetOpcode::G_ATOMICRMW_UMIN:
4169 case TargetOpcode::G_ATOMICRMW_UMAX:
4170 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4171 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4172 case TargetOpcode::G_ATOMICRMW_FADD:
4173 case TargetOpcode::G_ATOMICRMW_FMIN:
4174 case TargetOpcode::G_ATOMICRMW_FMAX:
4175 return selectG_LOAD_STORE_ATOMICRMW(
I);
4176 case TargetOpcode::G_SELECT:
4177 return selectG_SELECT(
I);
4178 case TargetOpcode::G_TRUNC:
4179 return selectG_TRUNC(
I);
4180 case TargetOpcode::G_SEXT:
4181 case TargetOpcode::G_ZEXT:
4182 case TargetOpcode::G_ANYEXT:
4183 case TargetOpcode::G_SEXT_INREG:
4187 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4190 return selectG_SZA_EXT(
I);
4191 case TargetOpcode::G_FPEXT:
4192 if (selectG_FPEXT(
I))
4195 case TargetOpcode::G_BRCOND:
4196 return selectG_BRCOND(
I);
4197 case TargetOpcode::G_GLOBAL_VALUE:
4198 return selectG_GLOBAL_VALUE(
I);
4199 case TargetOpcode::G_PTRMASK:
4200 return selectG_PTRMASK(
I);
4201 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4202 return selectG_EXTRACT_VECTOR_ELT(
I);
4203 case TargetOpcode::G_INSERT_VECTOR_ELT:
4204 return selectG_INSERT_VECTOR_ELT(
I);
4205 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4206 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4207 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4208 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4209 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4212 assert(Intr &&
"not an image intrinsic with image pseudo");
4213 return selectImageIntrinsic(
I, Intr);
4215 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4216 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4217 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4218 return selectBVHIntersectRayIntrinsic(
I);
4219 case AMDGPU::G_SBFX:
4220 case AMDGPU::G_UBFX:
4221 return selectG_SBFX_UBFX(
I);
4222 case AMDGPU::G_SI_CALL:
4223 I.setDesc(TII.get(AMDGPU::SI_CALL));
4225 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4226 return selectWaveAddress(
I);
4227 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4228 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4231 case AMDGPU::G_STACKRESTORE:
4232 return selectStackRestore(
I);
4234 return selectPHI(
I);
4235 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4236 return selectCOPY_SCC_VCC(
I);
4237 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4238 return selectCOPY_VCC_SCC(
I);
4239 case AMDGPU::G_AMDGPU_READANYLANE:
4240 return selectReadAnyLane(
I);
4241 case TargetOpcode::G_CONSTANT:
4242 case TargetOpcode::G_FCONSTANT:
4250AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4257std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4258 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4262 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4263 Src =
MI->getOperand(1).getReg();
4266 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4271 if (
LHS &&
LHS->isZero()) {
4273 Src =
MI->getOperand(2).getReg();
4277 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4278 Src =
MI->getOperand(1).getReg();
4285 return std::pair(Src, Mods);
4288Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4290 bool ForceVGPR)
const {
4291 if ((Mods != 0 || ForceVGPR) &&
4292 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4299 TII.
get(AMDGPU::COPY), VGPRSrc)
4311AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4313 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4318AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4321 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4324 [=](MachineInstrBuilder &MIB) {
4325 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4327 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4328 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4329 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4334AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4337 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4342 [=](MachineInstrBuilder &MIB) {
4343 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4345 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4346 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4347 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4352AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4354 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4355 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4356 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4361AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4364 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4367 [=](MachineInstrBuilder &MIB) {
4368 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4370 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4375AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4379 std::tie(Src, Mods) =
4380 selectVOP3ModsImpl(Root.
getReg(),
false);
4383 [=](MachineInstrBuilder &MIB) {
4384 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4386 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4391AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4394 std::tie(Src, Mods) =
4395 selectVOP3ModsImpl(Root.
getReg(),
true,
4399 [=](MachineInstrBuilder &MIB) {
4400 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4402 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4407AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4410 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4413 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4438 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4441 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4442 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4443 return DstSize * 2 == SrcSize;
4449 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4453 std::optional<ValueAndVReg> ShiftAmt;
4456 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4457 unsigned Shift = ShiftAmt->Value.getZExtValue();
4458 return Shift * 2 == SrcSize;
4466 if (
MI->getOpcode() != AMDGPU::G_SHL)
4470 std::optional<ValueAndVReg> ShiftAmt;
4473 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4474 unsigned Shift = ShiftAmt->Value.getZExtValue();
4475 return Shift * 2 == SrcSize;
4483 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4485 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4486 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4656static std::optional<std::pair<Register, SrcStatus>>
4661 unsigned Opc =
MI->getOpcode();
4665 case AMDGPU::G_BITCAST:
4666 return std::optional<std::pair<Register, SrcStatus>>(
4667 {
MI->getOperand(1).getReg(), Curr.second});
4669 if (
MI->getOperand(1).getReg().isPhysical())
4670 return std::nullopt;
4671 return std::optional<std::pair<Register, SrcStatus>>(
4672 {
MI->getOperand(1).getReg(), Curr.second});
4673 case AMDGPU::G_FNEG: {
4676 return std::nullopt;
4677 return std::optional<std::pair<Register, SrcStatus>>(
4678 {
MI->getOperand(1).getReg(), Stat});
4685 switch (Curr.second) {
4688 return std::optional<std::pair<Register, SrcStatus>>(
4691 if (Curr.first ==
MI->getOperand(0).getReg())
4692 return std::optional<std::pair<Register, SrcStatus>>(
4694 return std::optional<std::pair<Register, SrcStatus>>(
4706 return std::optional<std::pair<Register, SrcStatus>>(
4710 if (Curr.first ==
MI->getOperand(0).getReg())
4711 return std::optional<std::pair<Register, SrcStatus>>(
4713 return std::optional<std::pair<Register, SrcStatus>>(
4719 return std::optional<std::pair<Register, SrcStatus>>(
4724 return std::optional<std::pair<Register, SrcStatus>>(
4729 return std::optional<std::pair<Register, SrcStatus>>(
4734 return std::optional<std::pair<Register, SrcStatus>>(
4740 return std::nullopt;
4750 bool HasNeg =
false;
4752 bool HasOpsel =
true;
4757 unsigned Opc =
MI->getOpcode();
4759 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4762 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4765 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4789 while (
Depth <= MaxDepth && Curr.has_value()) {
4792 Statlist.push_back(Curr.value());
4799static std::pair<Register, SrcStatus>
4806 while (
Depth <= MaxDepth && Curr.has_value()) {
4812 LastSameOrNeg = Curr.value();
4817 return LastSameOrNeg;
4822 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4823 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4824 return Width1 == Width2;
4860 IsHalfState(HiStat);
4863std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4869 return {RootReg, Mods};
4872 SearchOptions SO(RootReg, MRI);
4883 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4885 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4886 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4888 return {Stat.first, Mods};
4894 if (StatlistHi.
empty()) {
4896 return {Stat.first, Mods};
4902 if (StatlistLo.
empty()) {
4904 return {Stat.first, Mods};
4907 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4908 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4909 if (StatlistHi[
I].first == StatlistLo[J].first &&
4911 StatlistHi[
I].first, RootReg, TII, MRI))
4912 return {StatlistHi[
I].first,
4913 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4919 return {Stat.first, Mods};
4929 return RB->
getID() == RBNo;
4946 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4951 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4957 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4960 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
4968AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
4973 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
4977 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4978 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4983AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4985 return selectVOP3PRetHelper(Root);
4989AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4991 return selectVOP3PRetHelper(Root,
true);
4995AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4998 "expected i1 value");
5004 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5012 switch (Elts.
size()) {
5014 DstRegClass = &AMDGPU::VReg_256RegClass;
5017 DstRegClass = &AMDGPU::VReg_128RegClass;
5020 DstRegClass = &AMDGPU::VReg_64RegClass;
5027 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5028 .addDef(
MRI.createVirtualRegister(DstRegClass));
5029 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5040 if (ModOpcode == TargetOpcode::G_FNEG) {
5044 for (
auto El : Elts) {
5050 if (Elts.size() != NegAbsElts.
size()) {
5059 assert(ModOpcode == TargetOpcode::G_FABS);
5067AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5073 assert(BV->getNumSources() > 0);
5075 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5076 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5079 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5080 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5087 if (BV->getNumSources() == EltsF32.
size()) {
5093 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5094 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5098AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5104 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5112 if (CV->getNumSources() == EltsV2F16.
size()) {
5119 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5120 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5124AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5130 assert(CV->getNumSources() > 0);
5131 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5133 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5137 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5138 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5145 if (CV->getNumSources() == EltsV2F16.
size()) {
5152 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5153 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5157AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5158 std::optional<FPValueAndVReg> FPValReg;
5160 if (TII.isInlineConstant(FPValReg->Value)) {
5161 return {{[=](MachineInstrBuilder &MIB) {
5162 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5172 if (TII.isInlineConstant(ICst)) {
5182AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5188 std::optional<ValueAndVReg> ShiftAmt;
5190 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5191 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5192 Key = ShiftAmt->Value.getZExtValue() / 8;
5197 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5198 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5203AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5210 std::optional<ValueAndVReg> ShiftAmt;
5212 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5213 ShiftAmt->Value.getZExtValue() == 16) {
5219 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5220 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5225AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5232 S32 = matchAnyExtendFromS32(Src);
5236 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5241 Src =
Def->getOperand(2).getReg();
5248 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5249 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5254AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5257 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5261 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5262 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5268AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5271 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5277 [=](MachineInstrBuilder &MIB) {
5279 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5281 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5286AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5289 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5295 [=](MachineInstrBuilder &MIB) {
5297 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5299 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5306bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5308 bool IsSigned)
const {
5309 if (!Subtarget->hasScaleOffset())
5313 MachineMemOperand *MMO = *
MI.memoperands_begin();
5325 OffsetReg =
Def->Reg;
5340 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5344 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5345 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5346 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5347 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5360bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5364 bool *ScaleOffset)
const {
5366 MachineBasicBlock *
MBB =
MI->getParent();
5371 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5373 if (AddrInfo.
empty())
5376 const GEPInfo &GEPI = AddrInfo[0];
5377 std::optional<int64_t> EncodedImm;
5380 *ScaleOffset =
false;
5385 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5386 AddrInfo.
size() > 1) {
5387 const GEPInfo &GEPI2 = AddrInfo[1];
5388 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5389 Register OffsetReg = GEPI2.SgprParts[1];
5392 selectScaleOffset(Root, OffsetReg,
false );
5393 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5395 Base = GEPI2.SgprParts[0];
5396 *SOffset = OffsetReg;
5405 auto SKnown =
VT->getKnownBits(*SOffset);
5406 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5418 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5419 Base = GEPI.SgprParts[0];
5425 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5431 Base = GEPI.SgprParts[0];
5432 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5433 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5438 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5439 Register OffsetReg = GEPI.SgprParts[1];
5441 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5442 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5444 Base = GEPI.SgprParts[0];
5445 *SOffset = OffsetReg;
5454AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5457 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5459 return std::nullopt;
5461 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5462 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5466AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5468 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5470 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5471 return std::nullopt;
5473 const GEPInfo &GEPInfo = AddrInfo[0];
5474 Register PtrReg = GEPInfo.SgprParts[0];
5475 std::optional<int64_t> EncodedImm =
5478 return std::nullopt;
5481 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5482 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5487AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5490 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5492 return std::nullopt;
5495 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5496 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5497 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5501AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5505 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5506 return std::nullopt;
5509 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5510 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5512 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5515std::pair<Register, int>
5516AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5517 uint64_t FlatVariant)
const {
5522 if (!STI.hasFlatInstOffsets())
5526 int64_t ConstOffset;
5528 std::tie(PtrBase, ConstOffset, IsInBounds) =
5529 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5535 if (ConstOffset == 0 ||
5537 !isFlatScratchBaseLegal(Root.
getReg())) ||
5541 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5542 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5545 return std::pair(PtrBase, ConstOffset);
5549AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5553 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5554 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5559AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5563 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5564 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5569AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5573 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5574 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5580AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5582 bool NeedIOffset)
const {
5585 int64_t ConstOffset;
5586 int64_t ImmOffset = 0;
5590 std::tie(PtrBase, ConstOffset, std::ignore) =
5591 getPtrBaseWithConstantOffset(Addr, *MRI);
5593 if (ConstOffset != 0) {
5598 ImmOffset = ConstOffset;
5601 if (isSGPR(PtrBaseDef->Reg)) {
5602 if (ConstOffset > 0) {
5608 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5610 std::tie(SplitImmOffset, RemainderOffset) =
5615 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5618 MachineBasicBlock *
MBB =
MI->getParent();
5620 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5622 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5624 .
addImm(RemainderOffset);
5628 [=](MachineInstrBuilder &MIB) {
5631 [=](MachineInstrBuilder &MIB) {
5634 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5635 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5638 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5639 [=](MachineInstrBuilder &MIB) {
5642 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5652 unsigned NumLiterals =
5653 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5654 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5655 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5656 return std::nullopt;
5663 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5668 if (isSGPR(SAddr)) {
5669 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5673 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5674 Subtarget->hasSignedGVSOffset());
5675 if (
Register VOffset = matchExtendFromS32OrS32(
5676 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5678 return {{[=](MachineInstrBuilder &MIB) {
5681 [=](MachineInstrBuilder &MIB) {
5684 [=](MachineInstrBuilder &MIB) {
5687 [=](MachineInstrBuilder &MIB) {
5691 return {{[=](MachineInstrBuilder &MIB) {
5694 [=](MachineInstrBuilder &MIB) {
5697 [=](MachineInstrBuilder &MIB) {
5707 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5708 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5709 return std::nullopt;
5714 MachineBasicBlock *
MBB =
MI->getParent();
5715 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5717 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5722 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5723 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5724 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5725 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5728 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5729 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5730 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5735AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5736 return selectGlobalSAddr(Root, 0);
5740AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5746 return selectGlobalSAddr(Root, PassedCPol);
5750AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5756 return selectGlobalSAddr(Root, PassedCPol);
5760AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5765AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5772 return selectGlobalSAddr(Root, PassedCPol,
false);
5776AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5783 return selectGlobalSAddr(Root, PassedCPol,
false);
5787AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5790 int64_t ConstOffset;
5791 int64_t ImmOffset = 0;
5795 std::tie(PtrBase, ConstOffset, std::ignore) =
5796 getPtrBaseWithConstantOffset(Addr, *MRI);
5798 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5802 ImmOffset = ConstOffset;
5806 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5807 int FI = AddrDef->MI->getOperand(1).
getIndex();
5810 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5816 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5817 Register LHS = AddrDef->MI->getOperand(1).getReg();
5818 Register RHS = AddrDef->MI->getOperand(2).getReg();
5822 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5823 isSGPR(RHSDef->Reg)) {
5824 int FI = LHSDef->MI->getOperand(1).getIndex();
5828 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5830 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5838 return std::nullopt;
5841 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5842 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5847bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5849 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5855 auto VKnown =
VT->getKnownBits(VAddr);
5858 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5859 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5860 return (VMax & 3) + (
SMax & 3) >= 4;
5864AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5867 int64_t ConstOffset;
5868 int64_t ImmOffset = 0;
5872 std::tie(PtrBase, ConstOffset, std::ignore) =
5873 getPtrBaseWithConstantOffset(Addr, *MRI);
5876 if (ConstOffset != 0 &&
5880 ImmOffset = ConstOffset;
5884 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5885 return std::nullopt;
5887 Register RHS = AddrDef->MI->getOperand(2).getReg();
5888 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5889 return std::nullopt;
5891 Register LHS = AddrDef->MI->getOperand(1).getReg();
5894 if (OrigAddr != Addr) {
5895 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5896 return std::nullopt;
5898 if (!isFlatScratchBaseLegalSV(OrigAddr))
5899 return std::nullopt;
5902 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5903 return std::nullopt;
5905 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5909 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5910 int FI = LHSDef->MI->getOperand(1).getIndex();
5912 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5914 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5915 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5924 return std::nullopt;
5927 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5928 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5929 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5930 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5935AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5937 MachineBasicBlock *
MBB =
MI->getParent();
5939 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5944 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5949 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5953 return {{[=](MachineInstrBuilder &MIB) {
5956 [=](MachineInstrBuilder &MIB) {
5959 [=](MachineInstrBuilder &MIB) {
5964 [=](MachineInstrBuilder &MIB) {
5973 std::optional<int> FI;
5976 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
5978 int64_t ConstOffset;
5979 std::tie(PtrBase, ConstOffset, std::ignore) =
5980 getPtrBaseWithConstantOffset(VAddr, *MRI);
5981 if (ConstOffset != 0) {
5982 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
5983 (!STI.privateMemoryResourceIsRangeChecked() ||
5984 VT->signBitIsZero(PtrBase))) {
5985 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
5986 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5992 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5996 return {{[=](MachineInstrBuilder &MIB) {
5999 [=](MachineInstrBuilder &MIB) {
6005 [=](MachineInstrBuilder &MIB) {
6010 [=](MachineInstrBuilder &MIB) {
6015bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6020 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6025 return VT->signBitIsZero(
Base);
6028bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6030 unsigned Size)
const {
6031 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6036 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6041 return VT->signBitIsZero(
Base);
6046 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6047 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6054bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6062 if (STI.hasSignedScratchOffsets())
6068 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6069 std::optional<ValueAndVReg> RhsValReg =
6075 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6076 RhsValReg->Value.getSExtValue() > -0x40000000)
6080 return VT->signBitIsZero(
LHS);
6085bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6093 if (STI.hasSignedScratchOffsets())
6098 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6103bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6107 if (STI.hasSignedScratchOffsets())
6112 std::optional<DefinitionAndSourceRegister> BaseDef =
6114 std::optional<ValueAndVReg> RHSOffset =
6124 (RHSOffset->Value.getSExtValue() < 0 &&
6125 RHSOffset->Value.getSExtValue() > -0x40000000)))
6128 Register LHS = BaseDef->MI->getOperand(1).getReg();
6129 Register RHS = BaseDef->MI->getOperand(2).getReg();
6130 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6133bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6134 unsigned ShAmtBits)
const {
6135 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6137 std::optional<APInt>
RHS =
6142 if (
RHS->countr_one() >= ShAmtBits)
6145 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6146 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6150AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6153 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6155 std::optional<DefinitionAndSourceRegister>
Def =
6157 assert(Def &&
"this shouldn't be an optional result");
6162 [=](MachineInstrBuilder &MIB) {
6165 [=](MachineInstrBuilder &MIB) {
6168 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6179 if (!TII.isLegalMUBUFImmOffset(
Offset))
6187 [=](MachineInstrBuilder &MIB) {
6190 [=](MachineInstrBuilder &MIB) {
6198 !TII.isLegalMUBUFImmOffset(
Offset))
6202 [=](MachineInstrBuilder &MIB) {
6205 [=](MachineInstrBuilder &MIB) {
6212std::pair<Register, unsigned>
6213AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6214 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6215 int64_t ConstAddr = 0;
6219 std::tie(PtrBase,
Offset, std::ignore) =
6220 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6223 if (isDSOffsetLegal(PtrBase,
Offset)) {
6225 return std::pair(PtrBase,
Offset);
6227 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6236 return std::pair(Root.
getReg(), 0);
6240AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6243 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6245 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6251AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6252 return selectDSReadWrite2(Root, 4);
6256AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6257 return selectDSReadWrite2(Root, 8);
6261AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6262 unsigned Size)
const {
6267 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6269 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6273std::pair<Register, unsigned>
6274AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6275 unsigned Size)
const {
6276 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6277 int64_t ConstAddr = 0;
6281 std::tie(PtrBase,
Offset, std::ignore) =
6282 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6285 int64_t OffsetValue0 =
Offset;
6287 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6289 return std::pair(PtrBase, OffsetValue0 /
Size);
6291 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6299 return std::pair(Root.
getReg(), 0);
6307std::tuple<Register, int64_t, bool>
6308AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6311 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6312 return {Root, 0,
false};
6315 std::optional<ValueAndVReg> MaybeOffset =
6318 return {Root, 0,
false};
6333 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6334 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6335 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6336 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6338 B.buildInstr(AMDGPU::S_MOV_B32)
6341 B.buildInstr(AMDGPU::S_MOV_B32)
6348 B.buildInstr(AMDGPU::REG_SEQUENCE)
6351 .addImm(AMDGPU::sub0)
6353 .addImm(AMDGPU::sub1);
6357 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6358 B.buildInstr(AMDGPU::S_MOV_B64)
6363 B.buildInstr(AMDGPU::REG_SEQUENCE)
6366 .addImm(AMDGPU::sub0_sub1)
6368 .addImm(AMDGPU::sub2_sub3);
6375 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6384 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6391AMDGPUInstructionSelector::MUBUFAddressData
6392AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6393 MUBUFAddressData
Data;
6399 std::tie(PtrBase,
Offset, std::ignore) =
6400 getPtrBaseWithConstantOffset(Src, *MRI);
6406 if (MachineInstr *InputAdd
6408 Data.N2 = InputAdd->getOperand(1).getReg();
6409 Data.N3 = InputAdd->getOperand(2).getReg();
6424bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6430 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6431 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6437void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6439 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6443 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6444 B.buildInstr(AMDGPU::S_MOV_B32)
6450bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6455 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6458 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6459 if (!shouldUseAddr64(AddrData))
6465 Offset = AddrData.Offset;
6471 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6473 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6486 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6497 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6501bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6506 if (STI.useFlatForGlobal())
6509 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6510 if (shouldUseAddr64(AddrData))
6516 Offset = AddrData.Offset;
6522 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6527AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6533 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6539 [=](MachineInstrBuilder &MIB) {
6542 [=](MachineInstrBuilder &MIB) {
6545 [=](MachineInstrBuilder &MIB) {
6548 else if (STI.hasRestrictedSOffset())
6549 MIB.
addReg(AMDGPU::SGPR_NULL);
6553 [=](MachineInstrBuilder &MIB) {
6563AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6568 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6572 [=](MachineInstrBuilder &MIB) {
6575 [=](MachineInstrBuilder &MIB) {
6578 else if (STI.hasRestrictedSOffset())
6579 MIB.
addReg(AMDGPU::SGPR_NULL);
6591AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6596 SOffset = AMDGPU::SGPR_NULL;
6598 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6602static std::optional<uint64_t>
6606 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6607 return std::nullopt;
6608 return Lo_32(*OffsetVal);
6612AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6613 std::optional<uint64_t> OffsetVal =
6618 std::optional<int64_t> EncodedImm =
6623 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6627AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6634 std::optional<int64_t> EncodedImm =
6639 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6643AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6651 return std::nullopt;
6653 std::optional<int64_t> EncodedOffset =
6656 return std::nullopt;
6659 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6660 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6663std::pair<Register, unsigned>
6664AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6665 bool &Matched)
const {
6670 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6680 const auto CheckAbsNeg = [&]() {
6685 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6716AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6721 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6726 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6727 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6732AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6736 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6739 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6740 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6744bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6748 Register CCReg =
I.getOperand(0).getReg();
6753 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6754 .
addImm(
I.getOperand(2).getImm());
6758 I.eraseFromParent();
6759 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6763bool AMDGPUInstructionSelector::selectSGetBarrierState(
6767 const MachineOperand &BarOp =
I.getOperand(2);
6768 std::optional<int64_t> BarValImm =
6772 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6776 MachineInstrBuilder MIB;
6777 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6778 : AMDGPU::S_GET_BARRIER_STATE_M0;
6781 auto DstReg =
I.getOperand(0).getReg();
6782 const TargetRegisterClass *DstRC =
6783 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6784 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6790 I.eraseFromParent();
6795 if (HasInlineConst) {
6799 case Intrinsic::amdgcn_s_barrier_join:
6800 return AMDGPU::S_BARRIER_JOIN_IMM;
6801 case Intrinsic::amdgcn_s_get_named_barrier_state:
6802 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6808 case Intrinsic::amdgcn_s_barrier_join:
6809 return AMDGPU::S_BARRIER_JOIN_M0;
6810 case Intrinsic::amdgcn_s_get_named_barrier_state:
6811 return AMDGPU::S_GET_BARRIER_STATE_M0;
6816bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6820 const MachineOperand &BarOp =
I.getOperand(1);
6821 const MachineOperand &CntOp =
I.getOperand(2);
6824 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6830 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6837 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6843 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6844 constexpr unsigned ShAmt = 16;
6850 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6860 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6861 ? AMDGPU::S_BARRIER_INIT_M0
6862 : AMDGPU::S_BARRIER_SIGNAL_M0;
6863 MachineInstrBuilder MIB;
6866 I.eraseFromParent();
6870bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6874 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6877 std::optional<int64_t> BarValImm =
6882 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6888 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6894 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6899 MachineInstrBuilder MIB;
6903 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6904 auto DstReg =
I.getOperand(0).getReg();
6905 const TargetRegisterClass *DstRC =
6906 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6907 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6913 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6917 I.eraseFromParent();
6924 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6925 "Expected G_CONSTANT");
6926 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6932 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6933 "Expected G_CONSTANT");
6934 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6940 const MachineOperand &
Op =
MI.getOperand(1);
6941 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6942 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6948 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6949 "Expected G_CONSTANT");
6950 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6958 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
6975 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6979void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6981 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6986void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6988 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6994void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6996 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7001void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7003 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7009void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7011 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7016void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7018 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7023void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7025 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7030void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7032 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7041 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7050 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7057void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7059 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7060 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7075 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7077 assert(ExpVal != INT_MIN);
7095 if (
MI.getOperand(
OpIdx).getImm())
7097 MIB.
addImm((int64_t)Mods);
7104 if (
MI.getOperand(
OpIdx).getImm())
7106 MIB.
addImm((int64_t)Mods);
7112 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7120 MIB.
addImm((int64_t)Mods);
7126 uint32_t
V =
MI.getOperand(2).getImm();
7129 if (!Subtarget->hasSafeCUPrefetch())
7135void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7137 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7146bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7147 return TII.isInlineConstant(Imm);
7150bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7151 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.