23#define DEBUG_TYPE "si-fold-operands"
44 unsigned DefSubReg = AMDGPU::NoSubRegister;
49 FoldableDef() =
delete;
51 unsigned DefSubReg = AMDGPU::NoSubRegister)
52 : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.
getType()) {
55 ImmToFold = FoldOp.
getImm();
56 }
else if (FoldOp.
isFI()) {
57 FrameIndexToFold = FoldOp.
getIndex();
67 unsigned DefSubReg = AMDGPU::NoSubRegister)
68 : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
73 FoldableDef Copy(*
this);
74 Copy.DefSubReg =
TRI.composeSubRegIndices(DefSubReg, SubReg);
82 return OpToFold->getReg();
85 unsigned getSubReg()
const {
87 return OpToFold->getSubReg();
98 return FrameIndexToFold;
106 std::optional<int64_t> getEffectiveImmVal()
const {
114 unsigned OpIdx)
const {
117 std::optional<int64_t> ImmToFold = getEffectiveImmVal();
127 if (DefSubReg != AMDGPU::NoSubRegister)
135 if (DefSubReg != AMDGPU::NoSubRegister)
137 return TII.isOperandLegal(
MI,
OpIdx, OpToFold);
144struct FoldCandidate {
152 bool Commuted =
false,
int ShrinkOp = -1)
153 :
UseMI(
MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
154 Commuted(Commuted) {}
156 bool isFI()
const {
return Def.isFI(); }
160 return Def.FrameIndexToFold;
163 bool isImm()
const {
return Def.isImm(); }
165 bool isReg()
const {
return Def.isReg(); }
169 bool isGlobal()
const {
return Def.isGlobal(); }
171 bool needsShrink()
const {
return ShrinkOpcode != -1; }
174class SIFoldOperandsImpl {
184 const FoldableDef &OpToFold)
const;
187 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
189 case AMDGPU::S_ADD_I32: {
190 if (ST->hasAddNoCarryInsts())
191 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
192 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
194 case AMDGPU::S_OR_B32:
195 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
196 case AMDGPU::S_AND_B32:
197 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
198 case AMDGPU::S_MUL_I32:
199 return AMDGPU::V_MUL_LO_U32_e64;
201 return AMDGPU::INSTRUCTION_LIST_END;
205 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
211 int64_t ImmVal)
const;
215 int64_t ImmVal)
const;
219 const FoldableDef &OpToFold)
const;
228 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
231 std::pair<int64_t, const TargetRegisterClass *>
248 bool foldInstOperand(
MachineInstr &
MI,
const FoldableDef &OpToFold)
const;
250 bool foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const;
257 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
266 SIFoldOperandsImpl() =
default;
280 return SIFoldOperandsImpl().run(MF);
283 StringRef getPassName()
const override {
return "SI Fold Operands"; }
300char SIFoldOperandsLegacy::
ID = 0;
309 TRI.getSubRegisterClass(RC, MO.getSubReg()))
317 case AMDGPU::V_MAC_F32_e64:
318 return AMDGPU::V_MAD_F32_e64;
319 case AMDGPU::V_MAC_F16_e64:
320 return AMDGPU::V_MAD_F16_e64;
321 case AMDGPU::V_FMAC_F32_e64:
322 return AMDGPU::V_FMA_F32_e64;
323 case AMDGPU::V_FMAC_F16_e64:
324 return AMDGPU::V_FMA_F16_gfx9_e64;
325 case AMDGPU::V_FMAC_F16_t16_e64:
326 return AMDGPU::V_FMA_F16_gfx9_t16_e64;
327 case AMDGPU::V_FMAC_F16_fake16_e64:
328 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
329 case AMDGPU::V_FMAC_LEGACY_F32_e64:
330 return AMDGPU::V_FMA_LEGACY_F32_e64;
331 case AMDGPU::V_FMAC_F64_e64:
332 return AMDGPU::V_FMA_F64_e64;
334 return AMDGPU::INSTRUCTION_LIST_END;
340 const FoldableDef &OpToFold)
const {
341 if (!OpToFold.isFI())
344 const unsigned Opc =
UseMI.getOpcode();
346 case AMDGPU::S_ADD_I32:
347 case AMDGPU::S_ADD_U32:
348 case AMDGPU::V_ADD_U32_e32:
349 case AMDGPU::V_ADD_CO_U32_e32:
353 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
355 case AMDGPU::V_ADD_U32_e64:
356 case AMDGPU::V_ADD_CO_U32_e64:
357 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
364 return OpNo == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
368 int SIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
372 int VIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
373 return OpNo == VIdx && SIdx == -1;
379bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
381 if (
TRI->isVGPR(*MRI, DstReg) &&
TRI->isSGPRReg(*MRI, SrcReg) &&
384 if (!Def ||
Def->getNumOperands() != 4)
387 MachineOperand *Src0 = &
Def->getOperand(1);
388 MachineOperand *Src1 = &
Def->getOperand(2);
399 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
400 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
401 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
402 !
Def->getOperand(3).isDead())
405 MachineBasicBlock *
MBB =
Def->getParent();
407 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
408 MachineInstrBuilder
Add =
411 if (
Add->getDesc().getNumDefs() == 2) {
413 Add.addDef(CarryOutReg, RegState::Dead);
417 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
421 Def->eraseFromParent();
422 MI.eraseFromParent();
426 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
437 Def->eraseFromParent();
438 MI.eraseFromParent();
447 return new SIFoldOperandsLegacy();
450bool SIFoldOperandsImpl::canUseImmWithOpSel(
const MachineInstr *
MI,
452 int64_t ImmVal)
const {
459 int OpNo =
MI->getOperandNo(&Old);
461 unsigned Opcode =
MI->getOpcode();
462 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
484bool SIFoldOperandsImpl::tryFoldImmWithOpSel(MachineInstr *
MI,
unsigned UseOpNo,
485 int64_t ImmVal)
const {
486 MachineOperand &Old =
MI->getOperand(UseOpNo);
487 unsigned Opcode =
MI->getOpcode();
488 int OpNo =
MI->getOperandNo(&Old);
489 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
501 AMDGPU::OpName ModName = AMDGPU::OpName::NUM_OPERAND_NAMES;
502 unsigned SrcIdx = ~0;
503 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
504 ModName = AMDGPU::OpName::src0_modifiers;
506 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
507 ModName = AMDGPU::OpName::src1_modifiers;
509 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
510 ModName = AMDGPU::OpName::src2_modifiers;
513 assert(ModName != AMDGPU::OpName::NUM_OPERAND_NAMES);
514 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModName);
515 MachineOperand &
Mod =
MI->getOperand(ModIdx);
516 unsigned ModVal =
Mod.getImm();
522 uint32_t
Imm = (
static_cast<uint32_t
>(ImmHi) << 16) | ImmLo;
527 auto tryFoldToInline = [&](uint32_t
Imm) ->
bool {
536 uint16_t
Lo =
static_cast<uint16_t
>(
Imm);
537 uint16_t
Hi =
static_cast<uint16_t
>(
Imm >> 16);
540 Mod.setImm(NewModVal);
545 if (
static_cast<int16_t
>(
Lo) < 0) {
546 int32_t SExt =
static_cast<int16_t
>(
Lo);
548 Mod.setImm(NewModVal);
563 uint32_t Swapped = (
static_cast<uint32_t
>(
Lo) << 16) |
Hi;
574 if (tryFoldToInline(Imm))
583 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
584 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
585 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
587 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
588 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
591 uint16_t NegLo = -
static_cast<uint16_t
>(
Imm);
592 uint16_t NegHi = -
static_cast<uint16_t
>(
Imm >> 16);
593 uint32_t NegImm = (
static_cast<uint32_t
>(NegHi) << 16) | NegLo;
595 if (tryFoldToInline(NegImm)) {
597 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
598 MI->setDesc(
TII->get(NegOpcode));
607bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
608 MachineInstr *
MI = Fold.UseMI;
609 MachineOperand &Old =
MI->getOperand(Fold.UseOpNo);
612 std::optional<int64_t> ImmVal;
614 ImmVal = Fold.Def.getEffectiveImmVal();
616 if (ImmVal && canUseImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal)) {
617 if (tryFoldImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal))
623 int OpNo =
MI->getOperandNo(&Old);
624 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
630 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
631 MachineBasicBlock *
MBB =
MI->getParent();
638 int Op32 = Fold.ShrinkOpcode;
639 MachineOperand &Dst0 =
MI->getOperand(0);
640 MachineOperand &Dst1 =
MI->getOperand(1);
648 MachineInstr *Inst32 =
TII->buildShrunkInst(*
MI, Op32);
650 if (HaveNonDbgCarryUse) {
653 .
addReg(AMDGPU::VCC, RegState::Kill);
663 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
664 MI->removeOperand(
I);
665 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
668 TII->commuteInstruction(*Inst32,
false);
672 assert(!Fold.needsShrink() &&
"not handled");
677 if (NewMFMAOpc == -1)
679 MI->setDesc(
TII->get(NewMFMAOpc));
680 MI->untieRegOperand(0);
681 const MCInstrDesc &MCID =
MI->getDesc();
682 for (
unsigned I = 0;
I <
MI->getNumDefs(); ++
I)
684 MI->getOperand(
I).setIsEarlyClobber(
true);
689 int OpNo =
MI->getOperandNo(&Old);
690 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
697 if (Fold.isGlobal()) {
698 Old.
ChangeToGA(Fold.Def.OpToFold->getGlobal(),
699 Fold.Def.OpToFold->getOffset(),
700 Fold.Def.OpToFold->getTargetFlags());
709 MachineOperand *
New = Fold.Def.OpToFold;
712 if (
const TargetRegisterClass *OpRC =
713 TII->getRegClass(
MI->getDesc(), Fold.UseOpNo)) {
714 const TargetRegisterClass *NewRC =
715 TRI->getRegClassForReg(*MRI,
New->getReg());
717 const TargetRegisterClass *ConstrainRC = OpRC;
718 if (
New->getSubReg()) {
720 TRI->getMatchingSuperRegClass(NewRC, OpRC,
New->getSubReg());
726 if (
New->getReg().isVirtual() &&
729 <<
TRI->getRegClassName(ConstrainRC) <<
'\n');
736 if (Old.
getSubReg() == AMDGPU::lo16 &&
TRI->isSGPRReg(*MRI,
New->getReg()))
738 if (
New->getReg().isPhysical()) {
746 if (
MI->isBundledWithPred()) {
748 for (MachineOperand &MO : Header.operands()) {
749 if (MO.getReg() == OldReg) {
750 MO.setReg(
New->getReg());
751 MO.setSubReg(
New->getSubReg());
760 FoldCandidate &&Entry) {
762 for (FoldCandidate &Fold : FoldList)
763 if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
765 LLVM_DEBUG(
dbgs() <<
"Append " << (Entry.Commuted ?
"commuted" :
"normal")
766 <<
" operand " << Entry.UseOpNo <<
"\n " << *Entry.UseMI);
772 const FoldableDef &FoldOp,
773 bool Commuted =
false,
int ShrinkOp = -1) {
775 FoldCandidate(
MI, OpNo, FoldOp, Commuted, ShrinkOp));
783 if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
793 const FoldableDef &OpToFold) {
794 assert(OpToFold.isImm() &&
"Expected immediate operand");
795 uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
801bool SIFoldOperandsImpl::tryAddToFoldList(
802 SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *
MI,
unsigned OpNo,
803 const FoldableDef &OpToFold)
const {
804 const unsigned Opc =
MI->getOpcode();
806 auto tryToFoldAsFMAAKorMK = [&]() {
807 if (!OpToFold.isImm())
810 const bool TryAK = OpNo == 3;
811 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
812 MI->setDesc(
TII->get(NewOpc));
815 bool FoldAsFMAAKorMK =
816 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
817 if (FoldAsFMAAKorMK) {
819 MI->untieRegOperand(3);
822 MachineOperand &Op1 =
MI->getOperand(1);
823 MachineOperand &Op2 =
MI->getOperand(2);
840 bool IsLegal = OpToFold.isOperandLegal(*
TII, *
MI, OpNo);
841 if (!IsLegal && OpToFold.isImm()) {
842 if (std::optional<int64_t> ImmVal = OpToFold.getEffectiveImmVal())
843 IsLegal = canUseImmWithOpSel(
MI, OpNo, *ImmVal);
849 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
852 MI->setDesc(
TII->get(NewOpc));
857 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
859 MI->untieRegOperand(OpNo);
863 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
869 if (
Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
870 if (tryToFoldAsFMAAKorMK())
875 if (OpToFold.isImm()) {
877 if (
Opc == AMDGPU::S_SETREG_B32)
878 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
879 else if (
Opc == AMDGPU::S_SETREG_B32_mode)
880 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
882 MI->setDesc(
TII->get(ImmOpc));
891 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
895 MachineOperand &
Op =
MI->getOperand(OpNo);
896 MachineOperand &CommutedOp =
MI->getOperand(CommuteOpNo);
902 if (!
Op.isReg() || !CommutedOp.
isReg())
907 if (
Op.isReg() && CommutedOp.
isReg() &&
908 (
Op.getReg() == CommutedOp.
getReg() &&
912 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
916 if (!OpToFold.isOperandLegal(*
TII, *
MI, CommuteOpNo)) {
917 if ((
Opc != AMDGPU::V_ADD_CO_U32_e64 &&
Opc != AMDGPU::V_SUB_CO_U32_e64 &&
918 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
919 (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
920 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
926 MachineOperand &OtherOp =
MI->getOperand(OpNo);
927 if (!OtherOp.
isReg() ||
934 unsigned MaybeCommutedOpc =
MI->getOpcode();
948 if (
Opc == AMDGPU::S_FMAC_F32 &&
949 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
950 if (tryToFoldAsFMAAKorMK())
956 if (OpToFold.isImm() &&
965bool SIFoldOperandsImpl::isUseSafeToFold(
const MachineInstr &
MI,
966 const MachineOperand &UseMO)
const {
968 return !
TII->isSDWA(
MI);
976 SubDef &&
TII.isFoldableCopy(*SubDef);
978 unsigned SrcIdx =
TII.getFoldableCopySrcIdx(*SubDef);
987 if (
SrcOp.getSubReg())
994const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
995 MachineInstr &RegSeq,
996 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs)
const {
1000 const TargetRegisterClass *RC =
nullptr;
1009 const TargetRegisterClass *OpRC =
getRegOpRC(*MRI, *
TRI, SrcOp);
1012 else if (!
TRI->getCommonSubClass(RC, OpRC))
1017 Defs.emplace_back(&SrcOp, SubRegIdx);
1022 if (DefSrc && (DefSrc->
isReg() || DefSrc->
isImm())) {
1023 Defs.emplace_back(DefSrc, SubRegIdx);
1027 Defs.emplace_back(&SrcOp, SubRegIdx);
1036const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
1037 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
1040 if (!Def || !
Def->isRegSequence())
1043 return getRegSeqInit(*Def, Defs);
1046std::pair<int64_t, const TargetRegisterClass *>
1047SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq)
const {
1049 const TargetRegisterClass *SrcRC = getRegSeqInit(RegSeq, Defs);
1053 bool TryToMatchSplat64 =
false;
1056 for (
unsigned I = 0,
E = Defs.
size();
I !=
E; ++
I) {
1057 const MachineOperand *
Op = Defs[
I].first;
1061 int64_t SubImm =
Op->getImm();
1067 if (Imm != SubImm) {
1068 if (
I == 1 && (
E & 1) == 0) {
1071 TryToMatchSplat64 =
true;
1079 if (!TryToMatchSplat64)
1080 return {Defs[0].first->getImm(), SrcRC};
1085 for (
unsigned I = 0,
E = Defs.
size();
I !=
E;
I += 2) {
1086 const MachineOperand *Op0 = Defs[
I].first;
1087 const MachineOperand *Op1 = Defs[
I + 1].first;
1092 unsigned SubReg0 = Defs[
I].second;
1093 unsigned SubReg1 = Defs[
I + 1].second;
1097 if (
TRI->getChannelFromSubReg(SubReg0) + 1 !=
1098 TRI->getChannelFromSubReg(SubReg1))
1101 if (
TRI->getSubRegIdxSize(SubReg0) != 32)
1106 SplatVal64 = MergedVal;
1107 else if (SplatVal64 != MergedVal)
1111 const TargetRegisterClass *RC64 =
TRI->getSubRegisterClass(
1114 return {SplatVal64, RC64};
1117bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
1118 MachineInstr *
UseMI,
unsigned UseOpIdx, int64_t SplatVal,
1119 const TargetRegisterClass *SplatRC)
const {
1121 if (UseOpIdx >=
Desc.getNumOperands())
1128 int16_t RCID =
TII->getOpRegClassID(
Desc.operands()[UseOpIdx]);
1132 const TargetRegisterClass *OpRC =
TRI->getRegClass(RCID);
1137 if (SplatVal != 0 && SplatVal != -1) {
1141 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
1147 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
1154 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
1160 if (!
TRI->getCommonSubClass(OpRC, SplatRC))
1165 if (!
TII->isOperandLegal(*
UseMI, UseOpIdx, &TmpOp))
1171bool SIFoldOperandsImpl::tryToFoldACImm(
1172 const FoldableDef &OpToFold, MachineInstr *
UseMI,
unsigned UseOpIdx,
1173 SmallVectorImpl<FoldCandidate> &FoldList)
const {
1175 if (UseOpIdx >=
Desc.getNumOperands())
1182 if (OpToFold.isImm() && OpToFold.isOperandLegal(*
TII, *
UseMI, UseOpIdx)) {
1193void SIFoldOperandsImpl::foldOperand(
1194 FoldableDef OpToFold, MachineInstr *
UseMI,
int UseOpIdx,
1195 SmallVectorImpl<FoldCandidate> &FoldList,
1196 SmallVectorImpl<MachineInstr *> &CopiesToReplace)
const {
1199 if (!isUseSafeToFold(*
UseMI, *UseOp))
1203 if (UseOp->
isReg() && OpToFold.isReg()) {
1207 if (UseOp->
getSubReg() != AMDGPU::NoSubRegister &&
1209 !
TRI->isSGPRReg(*MRI, OpToFold.getReg())))
1221 const TargetRegisterClass *SplatRC;
1222 std::tie(SplatVal, SplatRC) = isRegSeqSplat(*
UseMI);
1227 for (
unsigned I = 0;
I != UsesToProcess.size(); ++
I) {
1228 MachineOperand *RSUse = UsesToProcess[
I];
1229 MachineInstr *RSUseMI = RSUse->
getParent();
1239 if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
1240 FoldableDef SplatDef(SplatVal, SplatRC);
1247 if (RSUse->
getSubReg() != RegSeqDstSubReg)
1252 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
1259 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
1262 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
1267 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
1273 MachineOperand &SOff =
1274 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
1285 TII->getNamedOperand(*
UseMI, AMDGPU::OpName::cpol)->getImm();
1300 bool FoldingImmLike =
1301 OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1309 const TargetRegisterClass *SrcRC = MRI->
getRegClass(SrcReg);
1317 const TargetRegisterClass *DestRC =
TRI->getRegClassForReg(*MRI, DestReg);
1320 for (
unsigned MovOp :
1321 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1322 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1323 AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
1324 AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
1325 const MCInstrDesc &MovDesc =
TII->get(MovOp);
1326 const TargetRegisterClass *MovDstRC =
1335 const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
1337 int16_t RegClassID =
TII->getOpRegClassID(MovDesc.
operands()[SrcIdx]);
1338 if (RegClassID != -1) {
1339 const TargetRegisterClass *MovSrcRC =
TRI->getRegClass(RegClassID);
1342 MovSrcRC =
TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1346 if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
1347 (!OpToFold.isImm() ||
1348 !
TII->isImmOperandLegal(MovDesc, SrcIdx,
1349 *OpToFold.getEffectiveImmVal())))
1362 if (!OpToFold.isImm() ||
1363 !
TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
1369 while (ImpOpI != ImpOpE) {
1376 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1378 MachineOperand NewSrcOp(SrcOp);
1400 LLVM_DEBUG(
dbgs() <<
"Folding " << OpToFold.OpToFold <<
"\n into "
1405 unsigned SubRegIdx = OpToFold.getSubReg();
1419 static_assert(AMDGPU::sub1_hi16 == 12,
"Subregister layout has changed");
1424 if (SubRegIdx > AMDGPU::sub1) {
1425 LaneBitmask
M =
TRI->getSubRegIndexLaneMask(SubRegIdx);
1426 M |=
M.getLane(
M.getHighestLane() - 1);
1427 SmallVector<unsigned, 4> Indexes;
1428 TRI->getCoveringSubRegIndexes(
TRI->getRegClassForReg(*MRI,
UseReg), M,
1430 assert(Indexes.
size() == 1 &&
"Expected one 32-bit subreg to cover");
1431 SubRegIdx = Indexes[0];
1433 }
else if (
TII->getOpSize(*
UseMI, 1) == 4)
1436 SubRegIdx = AMDGPU::sub0;
1441 OpToFold.OpToFold->setIsKill(
false);
1445 if (foldCopyToAGPRRegSequence(
UseMI))
1450 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1451 (UseOpc == AMDGPU::V_READLANE_B32 &&
1453 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
1458 if (FoldingImmLike) {
1461 *OpToFold.DefMI, *
UseMI))
1467 if (OpToFold.isImm()) {
1469 *OpToFold.getEffectiveImmVal());
1470 }
else if (OpToFold.isFI())
1473 assert(OpToFold.isGlobal());
1475 OpToFold.OpToFold->getOffset(),
1476 OpToFold.OpToFold->getTargetFlags());
1482 if (OpToFold.isReg() &&
TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
1485 *OpToFold.DefMI, *
UseMI))
1507 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1515 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1521 case AMDGPU::S_ADD_I32:
1522 case AMDGPU::S_ADD_U32:
1525 case AMDGPU::S_SUB_I32:
1526 case AMDGPU::S_SUB_U32:
1529 case AMDGPU::V_AND_B32_e64:
1530 case AMDGPU::V_AND_B32_e32:
1531 case AMDGPU::S_AND_B32:
1534 case AMDGPU::V_OR_B32_e64:
1535 case AMDGPU::V_OR_B32_e32:
1536 case AMDGPU::S_OR_B32:
1539 case AMDGPU::V_XOR_B32_e64:
1540 case AMDGPU::V_XOR_B32_e32:
1541 case AMDGPU::S_XOR_B32:
1544 case AMDGPU::S_XNOR_B32:
1547 case AMDGPU::S_NAND_B32:
1550 case AMDGPU::S_NOR_B32:
1553 case AMDGPU::S_ANDN2_B32:
1556 case AMDGPU::S_ORN2_B32:
1559 case AMDGPU::V_LSHL_B32_e64:
1560 case AMDGPU::V_LSHL_B32_e32:
1561 case AMDGPU::S_LSHL_B32:
1563 Result =
LHS << (
RHS & 31);
1565 case AMDGPU::V_LSHLREV_B32_e64:
1566 case AMDGPU::V_LSHLREV_B32_e32:
1567 Result =
RHS << (
LHS & 31);
1569 case AMDGPU::V_LSHR_B32_e64:
1570 case AMDGPU::V_LSHR_B32_e32:
1571 case AMDGPU::S_LSHR_B32:
1572 Result =
LHS >> (
RHS & 31);
1574 case AMDGPU::V_LSHRREV_B32_e64:
1575 case AMDGPU::V_LSHRREV_B32_e32:
1576 Result =
RHS >> (
LHS & 31);
1578 case AMDGPU::V_ASHR_I32_e64:
1579 case AMDGPU::V_ASHR_I32_e32:
1580 case AMDGPU::S_ASHR_I32:
1581 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1583 case AMDGPU::V_ASHRREV_I32_e64:
1584 case AMDGPU::V_ASHRREV_I32_e32:
1585 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1593 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1599bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *
MI)
const {
1600 if (!
MI->allImplicitDefsAreDead())
1603 unsigned Opc =
MI->getOpcode();
1605 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
1609 MachineOperand *Src0 = &
MI->getOperand(Src0Idx);
1610 std::optional<int64_t> Src0Imm =
TII->getImmOrMaterializedImm(*Src0);
1612 if ((
Opc == AMDGPU::V_NOT_B32_e64 ||
Opc == AMDGPU::V_NOT_B32_e32 ||
1613 Opc == AMDGPU::S_NOT_B32) &&
1615 MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
1616 TII->mutateAndCleanupImplicit(
1621 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
1625 MachineOperand *Src1 = &
MI->getOperand(Src1Idx);
1626 std::optional<int64_t> Src1Imm =
TII->getImmOrMaterializedImm(*Src1);
1628 if (!Src0Imm && !Src1Imm)
1634 if (Src0Imm && Src1Imm) {
1639 bool IsSGPR =
TRI->isSGPRReg(*MRI,
MI->getOperand(0).getReg());
1643 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1644 MI->removeOperand(Src1Idx);
1651 if (
Opc == AMDGPU::S_SUB_I32 ||
Opc == AMDGPU::S_SUB_U32) {
1652 if (Src1Imm &&
static_cast<int32_t
>(*Src1Imm) == 0) {
1654 MI->removeOperand(Src1Idx);
1655 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1661 if (!
MI->isCommutable())
1664 if (Src0Imm && !Src1Imm) {
1670 int32_t Src1Val =
static_cast<int32_t
>(*Src1Imm);
1671 if (
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_ADD_U32) {
1674 MI->removeOperand(Src1Idx);
1675 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1681 if (
Opc == AMDGPU::V_OR_B32_e64 ||
1682 Opc == AMDGPU::V_OR_B32_e32 ||
1683 Opc == AMDGPU::S_OR_B32) {
1686 MI->removeOperand(Src1Idx);
1687 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1688 }
else if (Src1Val == -1) {
1690 MI->removeOperand(Src0Idx);
1691 TII->mutateAndCleanupImplicit(
1699 if (
Opc == AMDGPU::V_AND_B32_e64 ||
Opc == AMDGPU::V_AND_B32_e32 ||
1700 Opc == AMDGPU::S_AND_B32) {
1703 MI->removeOperand(Src0Idx);
1704 TII->mutateAndCleanupImplicit(
1706 }
else if (Src1Val == -1) {
1708 MI->removeOperand(Src1Idx);
1709 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1716 if (
Opc == AMDGPU::V_XOR_B32_e64 ||
Opc == AMDGPU::V_XOR_B32_e32 ||
1717 Opc == AMDGPU::S_XOR_B32) {
1720 MI->removeOperand(Src1Idx);
1721 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1730bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &
MI)
const {
1731 unsigned Opc =
MI.getOpcode();
1732 if (
Opc != AMDGPU::V_CNDMASK_B32_e32 &&
Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1733 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1736 MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
1737 MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
1739 std::optional<int64_t> Src1Imm =
TII->getImmOrMaterializedImm(*Src1);
1743 std::optional<int64_t> Src0Imm =
TII->getImmOrMaterializedImm(*Src0);
1744 if (!Src0Imm || *Src0Imm != *Src1Imm)
1749 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
1751 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
1752 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1753 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1759 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
1761 MI.removeOperand(Src2Idx);
1762 MI.removeOperand(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1));
1763 if (Src1ModIdx != -1)
1764 MI.removeOperand(Src1ModIdx);
1765 if (Src0ModIdx != -1)
1766 MI.removeOperand(Src0ModIdx);
1767 TII->mutateAndCleanupImplicit(
MI, NewDesc);
1772bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &
MI)
const {
1773 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1774 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1777 std::optional<int64_t> Src0Imm =
1778 TII->getImmOrMaterializedImm(
MI.getOperand(1));
1779 if (!Src0Imm || *Src0Imm != 0xffff || !
MI.getOperand(2).isReg())
1783 MachineInstr *SrcDef = MRI->
getVRegDef(Src1);
1789 if (!
MI.getOperand(2).isKill())
1791 MI.eraseFromParent();
1795bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &
MI,
1796 const FoldableDef &OpToFold)
const {
1800 SmallVector<MachineInstr *, 4> CopiesToReplace;
1802 MachineOperand &Dst =
MI.getOperand(0);
1805 if (OpToFold.isImm()) {
1816 if (tryConstantFoldOp(&
UseMI)) {
1825 for (
auto *U : UsesToProcess) {
1826 MachineInstr *
UseMI =
U->getParent();
1828 FoldableDef SubOpToFold = OpToFold.getWithSubReg(*
TRI,
U->getSubReg());
1833 if (CopiesToReplace.
empty() && FoldList.
empty())
1836 MachineFunction *MF =
MI.getMF();
1838 for (MachineInstr *Copy : CopiesToReplace)
1839 Copy->addImplicitDefUseOperands(*MF);
1841 SetVector<MachineInstr *> ConstantFoldCandidates;
1842 for (FoldCandidate &Fold : FoldList) {
1843 assert(!Fold.isReg() || Fold.Def.OpToFold);
1844 if (Fold.isReg() && Fold.getReg().isVirtual()) {
1846 const MachineInstr *
DefMI = Fold.Def.DefMI;
1854 assert(Fold.Def.OpToFold && Fold.isReg());
1861 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1865 ConstantFoldCandidates.
insert(Fold.UseMI);
1867 }
else if (Fold.Commuted) {
1869 TII->commuteInstruction(*Fold.UseMI,
false);
1873 for (MachineInstr *
MI : ConstantFoldCandidates) {
1874 if (tryConstantFoldOp(
MI)) {
1884bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI)
const {
1889 const TargetRegisterClass *DefRC =
1891 if (!
TRI->isAGPRClass(DefRC))
1903 DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
1905 const TargetRegisterClass *UseRC =
1912 unsigned NumFoldable = 0;
1914 for (
unsigned I = 1;
I != NumRegSeqOperands;
I += 2) {
1930 const TargetRegisterClass *DestSuperRC =
TRI->getMatchingSuperRegClass(
1931 DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1940 const TargetRegisterClass *InputRC =
1950 const TargetRegisterClass *MatchRC =
1951 TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1962 if (NumFoldable == 0)
1965 CopyMI->
setDesc(
TII->get(AMDGPU::REG_SEQUENCE));
1969 for (
auto [Def, DestSubIdx] : NewDefs) {
1970 if (!
Def->isReg()) {
1974 BuildMI(
MBB, CopyMI,
DL,
TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1979 Def->setIsKill(
false);
1981 Register &VGPRCopy = VGPRCopies[Src];
1983 const TargetRegisterClass *VGPRUseSubRC =
1984 TRI->getSubRegisterClass(UseRC, DestSubIdx);
1993 const TargetRegisterClass *SubRC =
2009 B.addImm(DestSubIdx);
2016bool SIFoldOperandsImpl::tryFoldFoldableCopy(
2017 MachineInstr &
MI, MachineOperand *&CurrentKnownM0Val)
const {
2021 if (DstReg == AMDGPU::M0) {
2022 MachineOperand &NewM0Val =
MI.getOperand(1);
2023 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
2024 MI.eraseFromParent();
2035 MachineOperand *OpToFoldPtr;
2036 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
2038 if (
TII->hasAnyModifiersSet(
MI))
2040 OpToFoldPtr = &
MI.getOperand(2);
2042 OpToFoldPtr = &
MI.getOperand(1);
2043 MachineOperand &OpToFold = *OpToFoldPtr;
2047 if (!FoldingImm && !OpToFold.
isReg())
2052 !
TRI->isConstantPhysReg(OpToFold.
getReg()))
2064 const TargetRegisterClass *DstRC =
2081 if (
MI.getOpcode() == AMDGPU::COPY && OpToFold.
isReg() &&
2083 if (DstRC == &AMDGPU::SReg_32RegClass &&
2092 if (OpToFold.
isReg() &&
MI.isCopy() && !
MI.getOperand(1).getSubReg()) {
2093 if (foldCopyToAGPRRegSequence(&
MI))
2097 FoldableDef
Def(OpToFold, DstRC);
2098 bool Changed = foldInstOperand(
MI, Def);
2105 auto *InstToErase = &
MI;
2107 auto &SrcOp = InstToErase->getOperand(1);
2109 InstToErase->eraseFromParent();
2111 InstToErase =
nullptr;
2115 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
2119 if (InstToErase && InstToErase->isRegSequence() &&
2121 InstToErase->eraseFromParent();
2131 return OpToFold.
isReg() &&
2132 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI);
2137const MachineOperand *
2138SIFoldOperandsImpl::isClamp(
const MachineInstr &
MI)
const {
2139 unsigned Op =
MI.getOpcode();
2141 case AMDGPU::V_MAX_F32_e64:
2142 case AMDGPU::V_MAX_F16_e64:
2143 case AMDGPU::V_MAX_F16_t16_e64:
2144 case AMDGPU::V_MAX_F16_fake16_e64:
2145 case AMDGPU::V_MAX_F64_e64:
2146 case AMDGPU::V_MAX_NUM_F64_e64:
2147 case AMDGPU::V_PK_MAX_F16:
2148 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2149 case AMDGPU::V_PK_MAX_NUM_BF16: {
2150 if (
MI.mayRaiseFPException())
2153 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
2157 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2158 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2162 Src0->
getSubReg() != AMDGPU::NoSubRegister)
2166 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2170 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
2172 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
2176 unsigned UnsetMods =
2177 (
Op == AMDGPU::V_PK_MAX_F16 ||
Op == AMDGPU::V_PK_MAX_NUM_BF16)
2180 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
2190bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &
MI) {
2191 const MachineOperand *ClampSrc = isClamp(
MI);
2207 if (
Def->mayRaiseFPException())
2210 MachineOperand *DefClamp =
TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
2214 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
2220 Register MIDstReg =
MI.getOperand(0).getReg();
2221 if (
TRI->isSGPRReg(*MRI, DefReg)) {
2230 MI.eraseFromParent();
2235 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2236 Def->eraseFromParent();
2243 case AMDGPU::V_MUL_F64_e64:
2244 case AMDGPU::V_MUL_F64_pseudo_e64: {
2246 case 0x3fe0000000000000:
2248 case 0x4000000000000000:
2250 case 0x4010000000000000:
2256 case AMDGPU::V_MUL_F32_e64: {
2257 switch (
static_cast<uint32_t>(Val)) {
2268 case AMDGPU::V_MUL_F16_e64:
2269 case AMDGPU::V_MUL_F16_t16_e64:
2270 case AMDGPU::V_MUL_F16_fake16_e64: {
2271 switch (
static_cast<uint16_t>(Val)) {
2290std::pair<const MachineOperand *, int>
2291SIFoldOperandsImpl::isOMod(
const MachineInstr &
MI)
const {
2292 unsigned Op =
MI.getOpcode();
2294 case AMDGPU::V_MUL_F64_e64:
2295 case AMDGPU::V_MUL_F64_pseudo_e64:
2296 case AMDGPU::V_MUL_F32_e64:
2297 case AMDGPU::V_MUL_F16_t16_e64:
2298 case AMDGPU::V_MUL_F16_fake16_e64:
2299 case AMDGPU::V_MUL_F16_e64: {
2301 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
2303 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
2304 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
2305 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
2308 MI.mayRaiseFPException())
2311 const MachineOperand *RegOp =
nullptr;
2312 const MachineOperand *ImmOp =
nullptr;
2313 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2314 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2315 if (Src0->
isImm()) {
2318 }
else if (Src1->
isImm()) {
2326 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
2327 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
2328 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
2329 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
2332 return std::pair(RegOp, OMod);
2334 case AMDGPU::V_ADD_F64_e64:
2335 case AMDGPU::V_ADD_F64_pseudo_e64:
2336 case AMDGPU::V_ADD_F32_e64:
2337 case AMDGPU::V_ADD_F16_e64:
2338 case AMDGPU::V_ADD_F16_t16_e64:
2339 case AMDGPU::V_ADD_F16_fake16_e64: {
2341 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
2343 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
2344 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
2345 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
2350 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2351 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2355 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
2356 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
2357 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
2358 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2369bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &
MI) {
2370 const MachineOperand *RegOp;
2372 std::tie(RegOp, OMod) = isOMod(
MI);
2374 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
2379 MachineOperand *DefOMod =
TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
2383 if (
Def->mayRaiseFPException())
2388 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
2398 MI.eraseFromParent();
2403 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2404 Def->eraseFromParent();
2411bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &
MI) {
2413 auto Reg =
MI.getOperand(0).getReg();
2415 if (!ST->hasGFX90AInsts() || !
TRI->isVGPR(*MRI,
Reg) ||
2420 if (!getRegSeqInit(Defs,
Reg))
2423 for (
auto &[
Op, SubIdx] : Defs) {
2426 if (
TRI->isAGPR(*MRI,
Op->getReg()))
2429 const MachineInstr *SubDef = MRI->
getVRegDef(
Op->getReg());
2437 MachineInstr *
UseMI =
Op->getParent();
2446 if (
Op->getSubReg())
2451 const TargetRegisterClass *OpRC =
TII->getRegClass(InstDesc,
OpIdx);
2452 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
2458 TII->get(AMDGPU::REG_SEQUENCE), Dst);
2460 for (
auto &[Def, SubIdx] : Defs) {
2461 Def->setIsKill(
false);
2462 if (
TRI->isAGPR(*MRI,
Def->getReg())) {
2475 RS->eraseFromParent();
2484 MI.eraseFromParent();
2492 Register &OutReg,
unsigned &OutSubReg) {
2502 if (
TRI.isAGPR(MRI, CopySrcReg)) {
2503 OutReg = CopySrcReg;
2512 if (!CopySrcDef || !CopySrcDef->
isCopy())
2519 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2520 !
TRI.isAGPR(MRI, OtherCopySrcReg))
2523 OutReg = OtherCopySrcReg;
2557bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &
PHI) {
2561 if (!
TRI->isVGPR(*MRI, PhiOut))
2566 const TargetRegisterClass *ARC =
nullptr;
2567 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2568 MachineOperand &MO =
PHI.getOperand(K);
2570 if (!Copy || !
Copy->isCopy())
2574 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2578 const TargetRegisterClass *CopyInRC = MRI->
getRegClass(AGPRSrc);
2579 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2590 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2594 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2595 MachineOperand &MO =
PHI.getOperand(K);
2599 MachineBasicBlock *InsertMBB =
nullptr;
2602 unsigned CopyOpc = AMDGPU::COPY;
2607 if (
Def->isCopy()) {
2609 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2622 MachineOperand &CopyIn =
Def->getOperand(1);
2625 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2628 InsertMBB =
Def->getParent();
2636 MachineInstr *
MI =
BuildMI(*InsertMBB, InsertPt,
PHI.getDebugLoc(),
2637 TII->get(CopyOpc), NewReg)
2647 PHI.getOperand(0).setReg(NewReg);
2653 TII->get(AMDGPU::COPY), PhiOut)
2661bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &
MI) {
2663 if (!ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2666 MachineOperand &
Def =
MI.getOperand(0);
2683 while (!
Users.empty()) {
2684 const MachineInstr *
I =
Users.pop_back_val();
2685 if (!
I->isCopy() && !
I->isRegSequence())
2687 Register DstReg =
I->getOperand(0).getReg();
2691 if (
TRI->isAGPR(*MRI, DstReg))
2695 Users.push_back(&U);
2698 const TargetRegisterClass *RC = MRI->
getRegClass(DefReg);
2700 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2705 while (!MoveRegs.
empty()) {
2747bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &
MBB) {
2750 if (ST->hasGFX90AInsts())
2754 DenseMap<std::pair<Register, unsigned>, std::vector<MachineOperand *>>
2757 for (
auto &
MI :
MBB) {
2761 if (!
TRI->isAGPR(*MRI,
MI.getOperand(0).getReg()))
2764 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2765 MachineOperand &PhiMO =
MI.getOperand(K);
2775 for (
const auto &[Entry, MOs] : RegToMO) {
2776 if (MOs.size() == 1)
2781 MachineBasicBlock *DefMBB =
Def->getParent();
2785 const TargetRegisterClass *ARC =
getRegOpRC(*MRI, *
TRI, *MOs.front());
2788 MachineInstr *VGPRCopy =
2790 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2796 TII->get(AMDGPU::COPY), TempAGPR)
2800 for (MachineOperand *MO : MOs) {
2812bool SIFoldOperandsImpl::run(MachineFunction &MF) {
2818 MFI = MF.
getInfo<SIMachineFunctionInfo>();
2828 MachineOperand *CurrentKnownM0Val =
nullptr;
2832 if (tryFoldZeroHighBits(
MI)) {
2837 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2842 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2847 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2852 if (
TII->isFoldableCopy(
MI)) {
2853 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2858 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2859 CurrentKnownM0Val =
nullptr;
2877 bool Changed = SIFoldOperandsImpl().run(MF);
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, FoldCandidate &&Entry)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static unsigned getMovOpc(bool IsScalar)
static MachineOperand * lookUpCopyChain(const SIInstrInfo &TII, const MachineRegisterInfo &MRI, Register SrcReg)
static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(const FoldableDef &OpToFold)
static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool hasDOTOpSelHazard() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
const HexagonRegisterInfo & getRegisterInfo() const
ArrayRef< MCOperandInfo > operands() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
void clearFlag(MIFlag Flag)
clearFlag - Clear a MI flag.
bool isRegSequence() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
LLVM_ABI void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static bool hasSameClamp(const MachineInstr &A, const MachineInstr &B)
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
LLVM_READONLY int32_t getFlatScratchInstSSfromSV(uint32_t Opcode)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
constexpr bool isVOP3(const T &...O)
constexpr bool isMAI(const T &...O)
constexpr bool isSWMMAC(const T &...O)
constexpr bool isVOP3P(const T &...O)
constexpr bool isWMMA(const T &...O)
constexpr bool isDOT(const T &...O)
constexpr bool isPacked(const T &...O)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineBasicBlock::instr_iterator getBundleStart(MachineBasicBlock::instr_iterator I)
Returns an iterator to the first instruction in the bundle containing I.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
char & SIFoldOperandsLegacyID
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.