78#define DEBUG_TYPE "si-fix-sgpr-copies"
81 "amdgpu-enable-merge-m0",
82 cl::desc(
"Merge and hoist M0 initializations"),
95 unsigned NumSVCopies = 0;
100 unsigned NumReadfirstlanes = 0;
102 bool NeedToBeConvertedToVALU =
false;
108 unsigned SiblingPenalty = 0;
110 V2SCopyInfo() : Copy(nullptr),
ID(0){};
111 V2SCopyInfo(
unsigned Id, MachineInstr *
C,
unsigned Width)
112 : Copy(
C), NumReadfirstlanes(Width / 32), ID(
Id){};
113#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
115 dbgs() << ID <<
" : " << *Copy <<
"\n\tS:" << SChain.size()
116 <<
"\n\tSV:" << NumSVCopies <<
"\n\tSP: " << SiblingPenalty
117 <<
"\nScore: " << Score <<
"\n";
122class SIFixSGPRCopies {
123 MachineDominatorTree *MDT;
124 SmallVector<MachineInstr*, 4> SCCCopies;
125 SmallVector<MachineInstr*, 4> RegSequences;
126 SmallVector<MachineInstr*, 4> PHINodes;
127 SmallVector<MachineInstr*, 4> S2VCopies;
128 unsigned NextVGPRToSGPRCopyID = 0;
129 MapVector<unsigned, V2SCopyInfo> V2SCopies;
130 DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
131 DenseSet<MachineInstr *> PHISources;
134 MachineRegisterInfo *MRI;
135 const SIRegisterInfo *TRI;
136 const SIInstrInfo *TII;
138 SIFixSGPRCopies(MachineDominatorTree *MDT) : MDT(MDT) {}
140 bool run(MachineFunction &MF);
141 void fixSCCCopies(MachineFunction &MF);
142 void prepareRegSequenceAndPHIs(MachineFunction &MF);
143 unsigned getNextVGPRToSGPRCopyId() {
return ++NextVGPRToSGPRCopyID; }
144 bool needToBeConvertedToVALU(V2SCopyInfo *
I);
145 void analyzeVGPRToSGPRCopy(MachineInstr *
MI);
146 void lowerVGPR2SGPRCopies(MachineFunction &MF);
153 void processPHINode(MachineInstr &
MI);
158 bool tryMoveVGPRConstToSGPR(MachineOperand &MO,
Register NewDst,
159 MachineBasicBlock *BlockToInsertTo,
168 SIFixSGPRCopiesLegacy() : MachineFunctionPass(ID) {}
170 bool runOnMachineFunction(MachineFunction &MF)
override {
171 MachineDominatorTree *MDT =
172 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
173 SIFixSGPRCopies Impl(MDT);
177 StringRef getPassName()
const override {
return "SI Fix SGPR copies"; }
179 void getAnalysisUsage(AnalysisUsage &AU)
const override {
188 MachineFunctionProperties getClearedProperties()
const override {
189 return MachineFunctionProperties().setNoPHIs();
201char SIFixSGPRCopiesLegacy::
ID = 0;
206 return new SIFixSGPRCopiesLegacy();
209static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
213 Register DstReg = Copy.getOperand(0).getReg();
214 Register SrcReg = Copy.getOperand(1).getReg();
218 :
TRI.getPhysRegBaseClass(SrcReg);
225 :
TRI.getPhysRegBaseClass(DstReg);
227 return std::pair(SrcRC, DstRC);
233 return SrcRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(DstRC) &&
234 TRI.hasVectorRegisters(SrcRC);
240 return DstRC != &AMDGPU::VReg_1RegClass &&
TRI.isSGPRClass(SrcRC) &&
241 TRI.hasVectorRegisters(DstRC);
248 auto &Src =
MI.getOperand(1);
255 const auto *
UseMI = MO.getParent();
258 if (MO.isDef() ||
UseMI->getParent() !=
MI.getParent() ||
259 UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
262 unsigned OpIdx = MO.getOperandNo();
263 if (
OpIdx >=
UseMI->getDesc().getNumOperands() ||
317 if (SubReg != AMDGPU::NoSubRegister)
331 bool IsAGPR =
TRI->isAGPRClass(DstRC);
333 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
335 TRI->getRegClassForOperandReg(MRI,
MI.getOperand(
I));
337 "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
349 unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
350 AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
357 MI.getOperand(
I).setReg(TmpReg);
369 if (Copy->getOpcode() != AMDGPU::COPY)
372 if (!MoveImm->isMoveImmediate())
376 TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
381 if (Copy->getOperand(1).getSubReg())
384 switch (MoveImm->getOpcode()) {
387 case AMDGPU::V_MOV_B32_e32:
388 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
389 SMovOp = AMDGPU::S_MOV_B32;
391 case AMDGPU::V_MOV_B64_e32:
392 case AMDGPU::V_MOV_B64_PSEUDO:
393 SMovOp = AMDGPU::S_MOV_B64_IMM_PSEUDO;
400template <
class UnaryPredicate>
410 while (!Worklist.
empty()) {
450 while (
I !=
MBB->end() &&
TII->isBasicBlockPrologue(*
I))
466 using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
477 for (
auto &MO :
MI.operands()) {
478 if ((MO.isReg() && ((MO.isDef() && MO.getReg() !=
Reg) || !MO.isDef())) ||
479 (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
487 Inits[Imm->getImm()].push_front(&
MI);
492 for (
auto &
Init : Inits) {
493 auto &Defs =
Init.second;
495 for (
auto I1 = Defs.begin(),
E = Defs.end(); I1 !=
E; ) {
498 for (
auto I2 = std::next(I1); I2 !=
E; ) {
507 auto interferes = [&MDT, From, To](
MachineInstr* &Clobber) ->
bool {
510 bool MayClobberFrom =
isReachable(Clobber, &*From, MBBTo, MDT);
511 bool MayClobberTo =
isReachable(Clobber, &*To, MBBTo, MDT);
512 if (!MayClobberFrom && !MayClobberTo)
514 if ((MayClobberFrom && !MayClobberTo) ||
515 (!MayClobberFrom && MayClobberTo))
521 return !((MBBFrom == MBBTo &&
529 return C.first !=
Init.first &&
535 if (!interferes(MI2, MI1)) {
545 if (!interferes(MI1, MI2)) {
563 if (!interferes(MI1,
I) && !interferes(MI2,
I)) {
567 <<
"and moving from "
584 for (
auto &
Init : Inits) {
585 auto &Defs =
Init.second;
586 auto I = Defs.begin();
587 while (
I != Defs.end()) {
588 if (MergedInstrs.
count(*
I)) {
589 (*I)->eraseFromParent();
597 for (
auto &
Init : Inits) {
598 auto &Defs =
Init.second;
599 for (
auto *
MI : Defs) {
600 auto *
MBB =
MI->getParent();
605 if (!
TII->isBasicBlockPrologue(*
B))
608 auto R = std::next(
MI->getReverseIterator());
609 const unsigned Threshold = 50;
611 for (
unsigned I = 0; R !=
B &&
I < Threshold; ++R, ++
I)
612 if (R->readsRegister(
Reg,
TRI) || R->definesRegister(
Reg,
TRI) ||
613 TII->isSchedulingBoundary(*R,
MBB, *
MBB->getParent()))
635 TRI =
ST.getRegisterInfo();
636 TII =
ST.getInstrInfo();
639 SmallVector<MachineInstr *, 8> Relegalize;
641 for (MachineBasicBlock &
MBB : MF) {
644 MachineInstr &
MI = *
I;
646 switch (
MI.getOpcode()) {
650 if (
TII->isWMMA(
MI) &&
655 const TargetRegisterClass *SrcRC, *DstRC;
671 if (lowerSpecialCase(
MI,
I))
674 analyzeVGPRToSGPRCopy(&
MI);
679 case AMDGPU::STRICT_WQM:
680 case AMDGPU::SOFT_WQM:
681 case AMDGPU::STRICT_WWM:
682 case AMDGPU::INSERT_SUBREG:
684 case AMDGPU::REG_SEQUENCE: {
685 if (
TRI->isSGPRClass(
TII->getOpRegClass(
MI, 0))) {
686 for (MachineOperand &MO :
MI.operands()) {
687 if (!MO.isReg() || !MO.getReg().isVirtual())
689 const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
690 if (SrcRC == &AMDGPU::VReg_1RegClass)
693 if (
TRI->hasVectorRegisters(SrcRC)) {
694 const TargetRegisterClass *DestRC =
695 TRI->getEquivalentSGPRClass(SrcRC);
696 Register NewDst = MRI->createVirtualRegister(DestRC);
697 MachineBasicBlock *BlockToInsertCopy =
698 MI.isPHI() ?
MI.getOperand(MO.getOperandNo() + 1).getMBB()
704 if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
705 PointToInsertCopy,
DL)) {
706 MachineInstr *NewCopy =
707 BuildMI(*BlockToInsertCopy, PointToInsertCopy,
DL,
708 TII->get(AMDGPU::COPY), NewDst)
711 analyzeVGPRToSGPRCopy(NewCopy);
712 PHISources.
insert(NewCopy);
720 else if (
MI.isRegSequence())
725 case AMDGPU::V_WRITELANE_B32: {
728 if (
ST.getConstantBusLimit(
MI.getOpcode()) != 1)
738 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
740 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
741 MachineOperand &Src0 =
MI.getOperand(Src0Idx);
742 MachineOperand &Src1 =
MI.getOperand(Src1Idx);
746 Src0.
getReg() != AMDGPU::M0) &&
748 Src1.
getReg() != AMDGPU::M0)) {
755 for (MachineOperand *MO : {&Src0, &Src1}) {
756 if (MO->getReg().isVirtual()) {
757 MachineInstr *
DefMI = MRI->getVRegDef(MO->getReg());
761 MO->getReg() ==
Def.getReg() &&
762 MO->getSubReg() ==
Def.getSubReg()) {
764 if (Copied.
isImm() &&
765 TII->isInlineConstant(APInt(64, Copied.
getImm(),
true))) {
766 MO->ChangeToImmediate(Copied.
getImm());
779 TII->get(AMDGPU::COPY), AMDGPU::M0)
790 lowerVGPR2SGPRCopies(MF);
793 for (
auto *
MI : S2VCopies) {
796 const TargetRegisterClass *SrcRC, *DstRC;
802 for (
auto *
MI : RegSequences) {
804 if (
MI->isRegSequence())
807 for (
auto *
MI : PHINodes) {
810 while (!Relegalize.
empty())
813 if (MF.getTarget().getOptLevel() > CodeGenOptLevel::None &&
EnableM0Merge)
816 SiblingPenalty.clear();
819 RegSequences.clear();
827void SIFixSGPRCopies::processPHINode(MachineInstr &
MI) {
828 bool AllAGPRUses =
true;
829 SetVector<const MachineInstr *> worklist;
830 SmallPtrSet<const MachineInstr *, 4> Visited;
831 SetVector<MachineInstr *> PHIOperands;
835 bool HasUses =
false;
836 while (!worklist.
empty()) {
839 for (
const auto &Use : MRI->use_operands(
Reg)) {
841 const MachineInstr *
UseMI =
Use.getParent();
844 TRI->isAGPR(*MRI,
Use.getReg());
855 const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
856 if (HasUses && AllAGPRUses && !
TRI->isAGPRClass(RC0)) {
858 MRI->setRegClass(PHIRes,
TRI->getEquivalentAGPRClass(RC0));
859 for (
unsigned I = 1,
N =
MI.getNumOperands();
I !=
N;
I += 2) {
860 MachineInstr *
DefMI = MRI->getVRegDef(
MI.getOperand(
I).getReg());
866 if (
TRI->hasVectorRegisters(MRI->getRegClass(PHIRes)) ||
867 RC0 == &AMDGPU::VReg_1RegClass) {
869 TII->legalizeOperands(
MI, MDT);
873 while (!PHIOperands.
empty()) {
878bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
879 MachineOperand &MaybeVGPRConstMO,
Register DstReg,
880 MachineBasicBlock *BlockToInsertTo,
883 MachineInstr *
DefMI = MRI->getVRegDef(MaybeVGPRConstMO.
getReg());
887 MachineOperand *SrcConst =
TII->getNamedOperand(*
DefMI, AMDGPU::OpName::src0);
888 if (SrcConst->
isReg())
891 const TargetRegisterClass *SrcRC =
892 MRI->getRegClass(MaybeVGPRConstMO.
getReg());
893 unsigned MoveSize =
TRI->getRegSizeInBits(*SrcRC);
895 MoveSize == 64 ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32;
896 BuildMI(*BlockToInsertTo, PointToInsertTo,
DL,
TII->get(MoveOp), DstReg)
898 if (MRI->hasOneUse(MaybeVGPRConstMO.
getReg()))
900 MaybeVGPRConstMO.
setReg(DstReg);
904bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &
MI,
913 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
914 if (DstReg == AMDGPU::M0 &&
TRI->hasVectorRegisters(SrcRC)) {
916 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
918 const MCInstrDesc &ReadFirstLaneDesc =
919 TII->get(AMDGPU::V_READFIRSTLANE_B32);
920 BuildMI(*
MI.getParent(),
MI,
MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg)
921 .
add(
MI.getOperand(1));
923 unsigned SubReg =
MI.getOperand(1).getSubReg();
924 MI.getOperand(1).setReg(TmpReg);
925 MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister);
927 const TargetRegisterClass *OpRC =
TII->getRegClass(ReadFirstLaneDesc, 1);
928 const TargetRegisterClass *ConstrainRC =
929 SubReg == AMDGPU::NoSubRegister
931 :
TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg);
933 if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
938 if (tryMoveVGPRConstToSGPR(
MI.getOperand(1), DstReg,
MI.getParent(),
MI,
940 I =
MI.eraseFromParent();
948 SIInstrWorklist worklist;
950 TII->moveToVALU(worklist, MDT);
959 MI.getOperand(1).ChangeToImmediate(Imm);
960 MI.addImplicitDefUseOperands(*
MI.getMF());
961 MI.setDesc(
TII->get(SMovOp));
967void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr*
MI) {
971 const TargetRegisterClass *DstRC =
TRI->getRegClassForReg(*MRI, DstReg);
973 V2SCopyInfo
Info(getNextVGPRToSGPRCopyId(),
MI,
974 TRI->getRegSizeInBits(*DstRC));
975 SmallVector<MachineInstr *, 8> AnalysisWorklist;
978 DenseSet<MachineInstr *> Visited;
980 while (!AnalysisWorklist.
empty()) {
984 if (!Visited.
insert(Inst).second)
995 const TargetRegisterClass *SrcRC, *DstRC;
1004 SiblingPenalty[Inst].insert(
Info.ID);
1006 SmallVector<MachineInstr *, 4>
Users;
1012 !
I->findRegisterDefOperand(AMDGPU::SCC,
nullptr)) {
1013 if (
I->readsRegister(AMDGPU::SCC,
nullptr))
1019 !
TII->isVALU(*Inst,
true)) {
1020 for (
auto &U : MRI->use_instructions(
Reg))
1021 Users.push_back(&U);
1024 for (
auto *U :
Users) {
1025 if (
TII->isSALU(*U))
1026 Info.SChain.insert(U);
1030 V2SCopies[
Info.ID] = std::move(Info);
1035bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *Info) {
1036 if (
Info->SChain.empty()) {
1041 Info->SChain, [&](MachineInstr *
A, MachineInstr *
B) ->
bool {
1042 return SiblingPenalty[A].size() < SiblingPenalty[B].size();
1044 Info->Siblings.remove_if([&](
unsigned ID) {
return ID ==
Info->ID; });
1050 SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
1051 for (
auto J :
Info->Siblings) {
1052 auto *InfoIt = V2SCopies.find(J);
1053 if (InfoIt != V2SCopies.end()) {
1054 MachineInstr *SiblingCopy = InfoIt->second.Copy;
1063 Info->SiblingPenalty = SrcRegs.
size();
1066 Info->NumSVCopies +
Info->SiblingPenalty +
Info->NumReadfirstlanes;
1067 unsigned Profit =
Info->SChain.size();
1068 Info->Score = Penalty > Profit ? 0 : Profit - Penalty;
1069 Info->NeedToBeConvertedToVALU =
Info->Score < 3;
1070 return Info->NeedToBeConvertedToVALU;
1073void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
1075 SmallVector<unsigned, 8> LoweringWorklist;
1076 for (
auto &
C : V2SCopies) {
1077 if (needToBeConvertedToVALU(&
C.second))
1085 while (!LoweringWorklist.
empty()) {
1087 auto *CurInfoIt = V2SCopies.find(CurID);
1088 if (CurInfoIt != V2SCopies.end()) {
1089 const V2SCopyInfo &
C = CurInfoIt->second;
1091 for (
auto S :
C.Siblings) {
1092 auto *SibInfoIt = V2SCopies.find(S);
1093 if (SibInfoIt != V2SCopies.end()) {
1094 V2SCopyInfo &
SI = SibInfoIt->second;
1096 if (!
SI.NeedToBeConvertedToVALU) {
1097 SI.SChain.set_subtract(
C.SChain);
1098 if (needToBeConvertedToVALU(&SI))
1101 SI.Siblings.remove_if([&](
unsigned ID) {
return ID ==
C.ID; });
1105 <<
" is being turned to VALU\n");
1109 V2SCopies.erase(
C.ID);
1117 for (
auto C : V2SCopies) {
1118 MachineInstr *
MI =
C.second.Copy;
1119 MachineBasicBlock *
MBB =
MI->getParent();
1123 <<
" is being turned to v_readfirstlane_b32"
1124 <<
" Score: " <<
C.second.Score <<
"\n");
1125 Register DstReg =
MI->getOperand(0).getReg();
1126 MRI->constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
1128 Register SrcReg =
MI->getOperand(1).getReg();
1129 unsigned SubReg =
MI->getOperand(1).getSubReg();
1130 const TargetRegisterClass *SrcRC =
1131 TRI->getRegClassForOperandReg(*MRI,
MI->getOperand(1));
1132 size_t SrcSize =
TRI->getRegSizeInBits(*SrcRC);
1133 if (SrcSize == 16) {
1135 "We do not expect to see 16-bit copies from VGPR to SGPR unless "
1136 "we have 16-bit VGPRs");
1137 assert(MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
1138 MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
1140 MRI->setRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
1141 Register VReg32 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1143 Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_16RegClass);
1146 .
addReg(SrcReg, {}, SubReg)
1147 .addImm(AMDGPU::lo16)
1152 }
else if (SrcSize == 32) {
1153 const MCInstrDesc &ReadFirstLaneDesc =
1154 TII->get(AMDGPU::V_READFIRSTLANE_B32);
1155 const TargetRegisterClass *OpRC =
TII->getRegClass(ReadFirstLaneDesc, 1);
1157 .
addReg(SrcReg, {}, SubReg);
1159 const TargetRegisterClass *ConstrainRC =
1160 SubReg == AMDGPU::NoSubRegister
1162 :
TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), OpRC,
1165 if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
1169 TII->get(AMDGPU::REG_SEQUENCE), DstReg);
1170 int N =
TRI->getRegSizeInBits(*SrcRC) / 32;
1171 for (
int i = 0; i <
N; i++) {
1173 Result, *MRI,
MI->getOperand(1), SrcRC,
1174 TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass);
1176 MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1178 TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst)
1180 Result.addReg(PartialDst).addImm(
TRI->getSubRegFromChannel(i));
1183 MI->eraseFromParent();
1187void SIFixSGPRCopies::fixSCCCopies(MachineFunction &MF) {
1188 const AMDGPU::LaneMaskConstants &LMC =
1190 for (MachineBasicBlock &
MBB : MF) {
1193 MachineInstr &
MI = *
I;
1199 if (SrcReg == AMDGPU::SCC) {
1201 MRI->createVirtualRegister(
TRI->getWaveMaskRegClass());
1206 I =
BuildMI(*
MI.getParent(), std::next(
I),
I->getDebugLoc(),
1207 TII->get(AMDGPU::COPY), DstReg)
1209 MI.eraseFromParent();
1212 if (DstReg == AMDGPU::SCC) {
1213 Register Tmp = MRI->createVirtualRegister(
TRI->getBoolRC());
1219 MI.eraseFromParent();
1229 SIFixSGPRCopies Impl(&MDT);
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getCopyRegClasses(const MachineInstr &Copy, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
static cl::opt< bool > EnableM0Merge("amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), cl::init(true))
static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo *TRI, MachineDominatorTree &MDT, const TargetInstrInfo *TII)
static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII, MachineRegisterInfo &MRI)
bool searchPredecessors(const MachineBasicBlock *MBB, const MachineBasicBlock *CutOff, UnaryPredicate Predicate)
static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT)
static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII)
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, const TargetRegisterClass *DstRC, const SIRegisterInfo &TRI)
static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, const MachineInstr *MoveImm, const SIInstrInfo *TII, unsigned &SMovOp, int64_t &Imm)
static MachineBasicBlock::iterator getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Implements a dense probed hash-table based set.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
bool isRegSequence() const
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
iterator_range< def_instr_iterator > def_instructions(Register Reg) const
use_instr_iterator use_instr_begin(Register RegNo) const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< reg_nodbg_iterator > reg_nodbg_operands(Register Reg) const
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
A vector that has set insertion semantics.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ Resolved
Queried, materialization begun.
NodeAddr< DefNode * > Def
NodeAddr< InstrNode * > Instr
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr RegState getDefRegState(bool B)
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
char & SIFixSGPRCopiesLegacyID
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createSIFixSGPRCopiesLegacyPass()
void insert(MachineInstr *MI)