35#define DEBUG_TYPE "gcn-vopd-utils"
41 unsigned Opc =
MI.getOpcode();
42 if (
Opc != AMDGPU::V_DOT2_F32_F16 &&
Opc != AMDGPU::V_DOT2_F32_BF16)
45 int16_t Src0ModsIdx = getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
48 int16_t Src1ModsIdx = getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
51 int16_t Src1Idx = getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
52 if (!
MI.getOperand(Src1Idx).isReg())
54 int16_t Src2ModsIdx = getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
57 int16_t Src2Idx = getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
58 if (!
MI.getOperand(Src2Idx).isReg())
60 int16_t ClampIdx = getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
61 if (
MI.getOperand(ClampIdx).getImm() != 0)
63 int16_t VdstIdx = getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
64 return MI.getOperand(VdstIdx).getReg() ==
MI.getOperand(Src2Idx).getReg();
75 if (IsVOPD3 && !ST.hasVOPD3())
80 if (
TII.isDPP(MIX) ||
TII.isDPP(MIY))
88 for (
auto &
Literal : UniqueLiterals) {
92 UniqueLiterals.push_back(&
Op);
97 for (
const auto &
Use : MIY.
uses())
101 auto getVRegIdx = [&](
unsigned OpcodeIdx,
unsigned OperandIdx) {
104 if (Operand.
isReg() &&
TRI->isVectorRegister(MRI, Operand.
getReg()))
111 for (
auto CompIdx : VOPD::COMPONENTS) {
116 if (!
TRI->isVectorRegister(MRI, Src0.
getReg())) {
120 }
else if (!
TII.isInlineConstant(Src0)) {
126 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
130 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
131 addLiteral(
MI.getOperand(CompOprIdx));
133 if (
MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
134 UniqueScalarRegs.
push_back(AMDGPU::VCC_LO);
137 for (
auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
141 if (
OpName == AMDGPU::OpName::src2) {
144 if (
MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
145 UniqueScalarRegs.
push_back(Src->getReg());
149 if (!Src->isReg() || !
TRI->isVGPR(MRI, Src->getReg()))
153 for (
auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
154 AMDGPU::OpName::op_sel}) {
162 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
163 AMDGPU::OpName::src2_modifiers}) {
171 if (UniqueLiterals.
size() > 1)
173 if ((UniqueLiterals.
size() + UniqueScalarRegs.
size()) > 2)
178 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
179 MIX.
getOpcode() == AMDGPU::V_MOV_B32_e32 &&
180 MIY.
getOpcode() == AMDGPU::V_MOV_B32_e32;
181 bool AllowSameVGPR = ST.hasGFX1250Insts();
183 if (InstInfo.hasInvalidOperand(getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR,
192 *
TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
198 *
TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
205 <<
"\n\tY: " << MIY <<
"\n");
221 const auto checkVOPD = [&](
bool VOPD3) ->
bool {
226 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
231 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
232 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
235#ifdef EXPENSIVE_CHECKS
238 MII != FirstMI->
getParent()->instr_end(); ++MII) {
239 if (&*MII == &SecondMI)
243 }() &&
"Expected FirstMI to precede SecondMI");
249 return checkVOPD(
false) || (ST.hasVOPD3() && checkVOPD(
true));
257struct VOPDPairingMutation : ScheduleDAGMutation {
264 void apply(ScheduleDAGInstrs *DAG)
override {
265 const TargetInstrInfo &
TII = *DAG->
TII;
268 LLVM_DEBUG(
dbgs() <<
"Target does not support VOPDPairingMutation\n");
272 std::vector<SUnit>::iterator ISUI, JSUI;
273 for (ISUI = DAG->
SUnits.begin(); ISUI != DAG->
SUnits.end(); ++ISUI) {
274 const MachineInstr *IMI = ISUI->getInstr();
280 for (JSUI = ISUI + 1; JSUI != DAG->
SUnits.end(); ++JSUI) {
281 if (JSUI->isBoundaryNode())
283 const MachineInstr *JMI = JSUI->getInstr();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static bool canMapVOP3PToVOPD(const MachineInstr &MI)
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Interface definition for SIInstrInfo.
This file defines the SmallVector class.
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3)
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.