LLVM 23.0.0git
GCNVOPDUtils.cpp
Go to the documentation of this file.
1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
19#include "SIInstrInfo.h"
21#include "llvm/ADT/STLExtras.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
37// Check if MI is a VOP3P instruction with operands that satisfy the constraints
38// for mapping it to a VOP2/VOPD opcode: no modifiers, no clamp, src1 and src2
39// are registers (src0 can be register or literal), and src2 is same as dst.
40static bool canMapVOP3PToVOPD(const MachineInstr &MI) {
41 unsigned Opc = MI.getOpcode();
42 if (Opc != AMDGPU::V_DOT2_F32_F16 && Opc != AMDGPU::V_DOT2_F32_BF16)
43 return false;
44 // src0 can be register or literal
45 int16_t Src0ModsIdx = getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
46 if (MI.getOperand(Src0ModsIdx).getImm() != SISrcMods::OP_SEL_1)
47 return false;
48 int16_t Src1ModsIdx = getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
49 if (MI.getOperand(Src1ModsIdx).getImm() != SISrcMods::OP_SEL_1)
50 return false;
51 int16_t Src1Idx = getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
52 if (!MI.getOperand(Src1Idx).isReg())
53 return false;
54 int16_t Src2ModsIdx = getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
55 if (MI.getOperand(Src2ModsIdx).getImm() != SISrcMods::OP_SEL_1)
56 return false;
57 int16_t Src2Idx = getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
58 if (!MI.getOperand(Src2Idx).isReg())
59 return false;
60 int16_t ClampIdx = getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
61 if (MI.getOperand(ClampIdx).getImm() != 0)
62 return false;
63 int16_t VdstIdx = getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
64 return MI.getOperand(VdstIdx).getReg() == MI.getOperand(Src2Idx).getReg();
65}
66
68 const MachineInstr &MIX,
69 const MachineInstr &MIY, bool IsVOPD3) {
70 namespace VOPD = AMDGPU::VOPD;
71
72 const MachineFunction *MF = MIX.getMF();
73 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
74
75 if (IsVOPD3 && !ST.hasVOPD3())
76 return false;
77 if (!IsVOPD3 && ((TII.isVOP3(MIX) && !canMapVOP3PToVOPD(MIX)) ||
78 (TII.isVOP3(MIY) && !canMapVOP3PToVOPD(MIY))))
79 return false;
80 if (TII.isDPP(MIX) || TII.isDPP(MIY))
81 return false;
82
83 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
84 const MachineRegisterInfo &MRI = MF->getRegInfo();
85 // Literals also count against scalar bus limit
87 auto addLiteral = [&](const MachineOperand &Op) {
88 for (auto &Literal : UniqueLiterals) {
89 if (Literal->isIdenticalTo(Op))
90 return;
91 }
92 UniqueLiterals.push_back(&Op);
93 };
94 SmallVector<Register> UniqueScalarRegs;
95
96 // MIX must not modify any registers used by MIY.
97 for (const auto &Use : MIY.uses())
98 if (Use.isReg() && MIX.modifiesRegister(Use.getReg(), TRI))
99 return false;
100
101 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
102 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? MIX : MIY;
103 const MachineOperand &Operand = MI.getOperand(OperandIdx);
104 if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
105 return Operand.getReg();
106 return Register();
107 };
108
109 auto InstInfo = AMDGPU::getVOPDInstInfo(MIX.getDesc(), MIY.getDesc());
110
111 for (auto CompIdx : VOPD::COMPONENTS) {
112 const MachineInstr &MI = (CompIdx == VOPD::X) ? MIX : MIY;
113
114 const MachineOperand &Src0 = *TII.getNamedOperand(MI, AMDGPU::OpName::src0);
115 if (Src0.isReg()) {
116 if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
117 if (!is_contained(UniqueScalarRegs, Src0.getReg()))
118 UniqueScalarRegs.push_back(Src0.getReg());
119 }
120 } else if (!TII.isInlineConstant(Src0)) {
121 if (IsVOPD3)
122 return false;
123 addLiteral(Src0);
124 }
125
126 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
127 if (IsVOPD3)
128 return false;
129
130 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
131 addLiteral(MI.getOperand(CompOprIdx));
132 }
133 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
134 UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
135
136 if (IsVOPD3) {
137 for (auto OpName : {AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
138 const MachineOperand *Src = TII.getNamedOperand(MI, OpName);
139 if (!Src)
140 continue;
141 if (OpName == AMDGPU::OpName::src2) {
142 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::bitop3))
143 continue;
144 if (MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
145 UniqueScalarRegs.push_back(Src->getReg());
146 continue;
147 }
148 }
149 if (!Src->isReg() || !TRI->isVGPR(MRI, Src->getReg()))
150 return false;
151 }
152
153 for (auto OpName : {AMDGPU::OpName::clamp, AMDGPU::OpName::omod,
154 AMDGPU::OpName::op_sel}) {
155 if (TII.hasModifiersSet(MI, OpName))
156 return false;
157 }
158
159 // Neg is allowed, other modifiers are not. NB: even though sext has the
160 // same value as neg, there are no combinable instructions with sext.
161 for (auto OpName :
162 {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
163 AMDGPU::OpName::src2_modifiers}) {
164 const MachineOperand *Mods = TII.getNamedOperand(MI, OpName);
165 if (Mods && (Mods->getImm() & ~SISrcMods::NEG))
166 return false;
167 }
168 }
169 }
170
171 if (UniqueLiterals.size() > 1)
172 return false;
173 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
174 return false;
175
176 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
177 // source-cache.
178 bool SkipSrc = (ST.hasGFX11_7Insts() || ST.hasGFX12Insts()) &&
179 MIX.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
180 MIY.getOpcode() == AMDGPU::V_MOV_B32_e32;
181 bool AllowSameVGPR = ST.hasGFX1250Insts();
182
183 if (InstInfo.hasInvalidOperand(getVRegIdx, *TRI, SkipSrc, AllowSameVGPR,
184 IsVOPD3))
185 return false;
186
187 if (IsVOPD3) {
188 // BITOP3 can be converted to DUAL_BITOP2 only if src2 is zero.
189 // MIX check is only relevant to scheduling?
190 if (AMDGPU::hasNamedOperand(MIX.getOpcode(), AMDGPU::OpName::bitop3)) {
191 const MachineOperand &Src2 =
192 *TII.getNamedOperand(MIX, AMDGPU::OpName::src2);
193 if (!Src2.isImm() || Src2.getImm())
194 return false;
195 }
196 if (AMDGPU::hasNamedOperand(MIY.getOpcode(), AMDGPU::OpName::bitop3)) {
197 const MachineOperand &Src2 =
198 *TII.getNamedOperand(MIY, AMDGPU::OpName::src2);
199 if (!Src2.isImm() || Src2.getImm())
200 return false;
201 }
202 }
203
204 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << MIX
205 << "\n\tY: " << MIY << "\n");
206 return true;
207}
208
209/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
210/// together. Given SecondMI, when FirstMI is unspecified, then check if
211/// SecondMI may be part of a fused pair at all.
213 const TargetSubtargetInfo &TSI,
214 const MachineInstr *FirstMI,
215 const MachineInstr &SecondMI) {
216 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
217 const GCNSubtarget &ST = STII.getSubtarget();
218 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST);
219 unsigned Opc2 = SecondMI.getOpcode();
220
221 const auto checkVOPD = [&](bool VOPD3) -> bool {
222 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
223
224 // One instruction case
225 if (!FirstMI)
226 return SecondCanBeVOPD.Y || SecondCanBeVOPD.X;
227
228 unsigned Opc = FirstMI->getOpcode();
229 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
230
231 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
232 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
233 return false;
234
235#ifdef EXPENSIVE_CHECKS
236 assert([&]() -> bool {
237 for (auto MII = MachineBasicBlock::const_iterator(FirstMI);
238 MII != FirstMI->getParent()->instr_end(); ++MII) {
239 if (&*MII == &SecondMI)
240 return true;
241 }
242 return false;
243 }() && "Expected FirstMI to precede SecondMI");
244#endif
245
246 return checkVOPDRegConstraints(STII, *FirstMI, SecondMI, VOPD3);
247 };
248
249 return checkVOPD(false) || (ST.hasVOPD3() && checkVOPD(true));
250}
251
252namespace {
253/// Adapts design from MacroFusion
254/// Puts valid candidate instructions back-to-back so they can easily
255/// be turned into VOPD instructions
256/// Greedily pairs instruction candidates. O(n^2) algorithm.
257struct VOPDPairingMutation : ScheduleDAGMutation {
258 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
259
260 VOPDPairingMutation(
261 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
263
264 void apply(ScheduleDAGInstrs *DAG) override {
265 const TargetInstrInfo &TII = *DAG->TII;
266 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
267 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
268 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
269 return;
270 }
271
272 std::vector<SUnit>::iterator ISUI, JSUI;
273 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
274 const MachineInstr *IMI = ISUI->getInstr();
275 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
276 continue;
277 if (!hasLessThanNumFused(*ISUI, 2))
278 continue;
279
280 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
281 if (JSUI->isBoundaryNode())
282 continue;
283 const MachineInstr *JMI = JSUI->getInstr();
284 if (!hasLessThanNumFused(*JSUI, 2) ||
285 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
286 continue;
287 if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
288 break;
289 }
290 }
291 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
292 }
293};
294} // namespace
295
296std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
297 return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
298}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
AMD GCN specific subclass of TargetSubtarget.
static bool canMapVOP3PToVOPD(const MachineInstr &MI)
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GCNSubtarget & getSubtarget() const
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetSubtargetInfo - Generic base class for all target subtargets.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
bool hasVOPD(const MCSubtargetInfo &STI)
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI, bool IsVOPD3)
LLVM_ABI bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
DWARFExpression::Operation Op
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition MacroFusion.h:33
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.