28 case AMDGPU::S_WAITCNT:
29 case AMDGPU::S_WAITCNT_soft:
30 case AMDGPU::S_WAITCNT_EXPCNT:
31 case AMDGPU::S_WAITCNT_LGKMCNT:
32 case AMDGPU::S_WAITCNT_VMCNT:
33 case AMDGPU::S_WAITCNT_VSCNT:
34 case AMDGPU::S_WAITCNT_VSCNT_soft:
35 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
36 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
37 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
38 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
39 case AMDGPU::S_WAITCNT_gfx10:
40 case AMDGPU::S_WAITCNT_gfx6_gfx7:
41 case AMDGPU::S_WAITCNT_vi:
42 return processWaitCnt(Inst, MCI);
48void AMDGPUInstrPostProcess::processWaitCnt(
Instruction &Inst,
50 for (
int Idx = 0,
N = MCI.
size(); Idx <
N; Idx++) {
55 }
else if (MCOp.
isImm()) {
67 generateWaitCntInfo();
82 case AMDGPU::S_WAITCNT:
83 case AMDGPU::S_WAITCNT_soft:
84 case AMDGPU::S_WAITCNT_EXPCNT:
85 case AMDGPU::S_WAITCNT_LGKMCNT:
86 case AMDGPU::S_WAITCNT_VMCNT:
87 case AMDGPU::S_WAITCNT_VSCNT:
88 case AMDGPU::S_WAITCNT_VSCNT_soft:
89 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
90 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
91 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
92 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
93 case AMDGPU::S_WAITCNT_gfx10:
94 case AMDGPU::S_WAITCNT_gfx6_gfx7:
95 case AMDGPU::S_WAITCNT_vi:
100 return handleWaitCnt(IssuedInst,
IR);
113 unsigned Lgkmcnt = 31;
115 unsigned CurrVmcnt = 0;
116 unsigned CurrExpcnt = 0;
117 unsigned CurrLgkmcnt = 0;
118 unsigned CurrVscnt = 0;
119 unsigned CyclesToWaitVm = ~0U;
120 unsigned CyclesToWaitExp = ~0U;
121 unsigned CyclesToWaitLgkm = ~0U;
122 unsigned CyclesToWaitVs = ~0U;
124 computeWaitCnt(
IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
128 for (
const InstRef &PrevIR : IssuedInst) {
129 const Instruction &PrevInst = *PrevIR.getInstruction();
130 const unsigned PrevInstIndex = PrevIR.getSourceIndex() %
SrcMgr.size();
131 const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
134 "We should know how many cycles are left for this instruction");
135 if (PrevInstWaitInfo.
VmCnt) {
137 if ((
unsigned)CyclesLeft < CyclesToWaitVm)
138 CyclesToWaitVm = CyclesLeft;
140 if (PrevInstWaitInfo.
ExpCnt) {
142 if ((
unsigned)CyclesLeft < CyclesToWaitExp)
143 CyclesToWaitExp = CyclesLeft;
145 if (PrevInstWaitInfo.
LgkmCnt) {
147 if ((
unsigned)CyclesLeft < CyclesToWaitLgkm)
148 CyclesToWaitLgkm = CyclesLeft;
150 if (PrevInstWaitInfo.
VsCnt) {
152 if ((
unsigned)CyclesLeft < CyclesToWaitVs)
153 CyclesToWaitVs = CyclesLeft;
157 unsigned CyclesToWait = ~0
U;
158 if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
159 CyclesToWait = CyclesToWaitVm;
160 if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
161 CyclesToWait = CyclesToWaitExp;
162 if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
163 CyclesToWait = CyclesToWaitLgkm;
164 if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
165 CyclesToWait = CyclesToWaitVs;
173 if (CyclesToWait == ~0U)
178void AMDGPUCustomBehaviour::computeWaitCnt(
const InstRef &
IR,
unsigned &Vmcnt,
179 unsigned &Expcnt,
unsigned &Lgkmcnt,
186 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
187 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
188 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
189 case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
195 assert(OpReg && OpReg->
isReg() &&
"First operand should be a register.");
196 assert(OpImm && OpImm->
isImm() &&
"Second operand should be an immediate.");
197 if (OpReg->
getReg() != AMDGPU::SGPR_NULL) {
202 <<
MCII.getName(Opcode) <<
" will be completely "
203 <<
"ignored. So the wait may not be accurate.\n";
209 case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
212 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
213 Lgkmcnt = OpImm->
getImm();
215 case AMDGPU::S_WAITCNT_VMCNT_gfx10:
218 case AMDGPU::S_WAITCNT_VSCNT_gfx10:
224 case AMDGPU::S_WAITCNT_gfx10:
225 case AMDGPU::S_WAITCNT_gfx6_gfx7:
226 case AMDGPU::S_WAITCNT_vi:
227 unsigned WaitCnt = Inst.
getOperand(0)->getImm();
233void AMDGPUCustomBehaviour::generateWaitCntInfo() {
245 InstrWaitCntInfo.resize(
SrcMgr.size());
248 const std::unique_ptr<Instruction> &Inst = EN.value();
249 unsigned Index = EN.index();
250 unsigned Opcode = Inst->getOpcode();
251 const MCInstrDesc &MCID =
MCII.get(Opcode);
253 InstrWaitCntInfo[
Index].LgkmCnt =
true;
254 if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
255 InstrWaitCntInfo[
Index].ExpCnt =
true;
261 InstrWaitCntInfo[
Index].LgkmCnt =
true;
262 if (!
STI.hasFeature(AMDGPU::FeatureVscnt))
263 InstrWaitCntInfo[
Index].VmCnt =
true;
265 InstrWaitCntInfo[
Index].VmCnt =
true;
267 InstrWaitCntInfo[
Index].VsCnt =
true;
270 if (!
STI.hasFeature(AMDGPU::FeatureVscnt))
271 InstrWaitCntInfo[
Index].VmCnt =
true;
275 InstrWaitCntInfo[
Index].VmCnt =
true;
277 InstrWaitCntInfo[
Index].VsCnt =
true;
284 InstrWaitCntInfo[
Index].ExpCnt =
true;
286 InstrWaitCntInfo[
Index].LgkmCnt =
true;
288 InstrWaitCntInfo[
Index].ExpCnt =
true;
291 case AMDGPU::S_SENDMSG:
292 case AMDGPU::S_SENDMSGHALT:
293 case AMDGPU::S_MEMTIME:
294 case AMDGPU::S_MEMREALTIME:
295 InstrWaitCntInfo[
Index].LgkmCnt =
true;
303bool AMDGPUCustomBehaviour::hasModifiersSet(
304 const std::unique_ptr<Instruction> &Inst, AMDGPU::OpName OpName)
const {
305 int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
309 const MCAOperand *
Op = Inst->getOperand(Idx);
310 if (
Op ==
nullptr || !
Op->isImm() || !
Op->getImm())
317bool AMDGPUCustomBehaviour::isAlwaysGDS(uint32_t Opcode)
const {
318 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
319 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
320 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
329static CustomBehaviour *
336static InstrPostProcess *
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static CustomBehaviour * createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA()
Extern function to initialize the targets for the AMDGPU backend.
static InstrPostProcess * createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
This file defines the AMDGPUCustomBehaviour class which inherits from CustomBehaviour.
Provides AMDGPU specific target descriptions.
#define LLVM_EXTERNAL_VISIBILITY
Legalize the Machine IR a function s Machine IR
static const uint32_t IV[8]
Represent a constant reference to an array (0 or more elements consecutively in memory),...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
const MCOperand & getOperand(unsigned i) const
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool mayLoad() const
Return true if this instruction could possibly read memory.
Interface to description of machine instruction set.
Instances of this class represent operands of the MCInst class.
MCRegister getReg() const
Returns the register number.
Generic base class for all target subtargets.
Value * getOperand(unsigned i) const
static LLVM_ABI raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
unsigned checkCustomHazard(ArrayRef< InstRef > IssuedInst, const InstRef &IR) override
This method is used to determine if an instruction should be allowed to be dispatched.
AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override
This method can be overriden by targets to modify the mca::Instruction object after it has been lower...
const mca::SourceMgr & SrcMgr
const MCSubtargetInfo & STI
CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII)
An InstRef contains both a SourceMgr index and Instruction pair.
unsigned getOpcode() const
void addOperand(const MCAOperand Op)
An instruction propagated through the simulated instruction pipeline.
int getCyclesLeft() const
A representation of an mca::Instruction operand for use in mca::CustomBehaviour.
unsigned getReg() const
Returns the register number.
static MCAOperand createImm(int64_t Val)
static MCAOperand createReg(unsigned Reg)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
bool getMUBUFIsBufferInv(unsigned Opc)
constexpr bool isAtomicRet(const T &...O)
constexpr bool isFLAT(const T &...O)
constexpr bool isAtomicNoRet(const T &...O)
constexpr bool isSMRD(const T &...O)
constexpr bool isMIMG(const T &...O)
constexpr bool isVMEM(const T &...O)
constexpr bool isGWS(const T &...O)
constexpr bool isEXP(const T &...O)
constexpr bool usesLGKM_CNT(const T &...O)
constexpr bool isDS(const T &...O)
constexpr int UNKNOWN_CYCLES
friend class Instruction
Iterator for Instructions in a `BasicBlock.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Target & getTheR600Target()
The target for R600 GPUs.
LLVM_ABI SourceMgr SrcMgr
Target & getTheGCNTarget()
The target for GCN GPUs.
DWARFExpression::Operation Op
static void RegisterInstrPostProcess(Target &T, Target::InstrPostProcessCtorTy Fn)
RegisterInstrPostProcess - Register an InstrPostProcess implementation for the given target.
static void RegisterCustomBehaviour(Target &T, Target::CustomBehaviourCtorTy Fn)
RegisterCustomBehaviour - Register a CustomBehaviour implementation for the given target.
Abstracting the input code sequence (a sequence of MCInst) and assigning unique identifiers to every ...