LLVM 23.0.0git
AMDGPUHWEvents.cpp
Go to the documentation of this file.
1//===- AMDGPUHWEvents.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUHWEvents.h"
10#include "GCNSubtarget.h"
11#include "SIInstrInfo.h"
13#include "llvm/Support/Debug.h"
15
16namespace llvm {
17namespace AMDGPU {
18
19#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
20LLVM_DUMP_METHOD void HWEvents::dump() const { dbgs() << *this << "\n"; }
21#endif
22
24 const SIInstrInfo &TII) {
25 if (TII.isVALU(Inst, /*AllowLDSDMA=*/true) && !SIInstrInfo::isLDSDMA(Inst)) {
26 // Core/Side-, DP-, XDL- and TRANS-MACC VALU instructions complete
27 // out-of-order with respect to each other, so each of these classes
28 // has its own event.
29
30 if (TII.isXDL(Inst))
31 return HWEvents::VGPR_XDL_WRITE;
32
33 if (TII.isTRANS(Inst))
34 return HWEvents::VGPR_TRANS_WRITE;
35
37 return HWEvents::VGPR_DPMACC_WRITE;
38
39 return HWEvents::VGPR_CSMACC_WRITE;
40 }
41
42 // FLAT and LDS instructions may read their VGPR sources out-of-order
43 // with respect to each other and all other VMEM instructions, so
44 // each of these also has a separate event.
45
46 if (TII.isFLAT(Inst))
47 return HWEvents::VGPR_FLAT_READ;
48
49 if (TII.isDS(Inst))
50 return HWEvents::VGPR_LDS_READ;
51
52 if (TII.isVMEM(Inst) || TII.isVIMAGE(Inst) || TII.isVSAMPLE(Inst))
53 return HWEvents::VGPR_VMEM_READ;
54
55 // Otherwise, no hazard.
56 return HWEvents::NONE;
57}
58
60 const SIInstrInfo &TII) {
61 switch (Inst.getOpcode()) {
62 // FIXME: GLOBAL_INV needs to be tracked with xcnt too.
63 case AMDGPU::GLOBAL_INV:
64 return HWEvents::GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't
65 // write VGPRs
66 case AMDGPU::GLOBAL_WB:
67 case AMDGPU::GLOBAL_WBINV:
68 return HWEvents::VMEM_WRITE_ACCESS; // tracked using storecnt
69 default:
70 break;
71 }
72
74 // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
75 // these should use VM_CNT.
77 return HWEvents::VMEM_READ_ACCESS;
78
79 if (Inst.mayStore() &&
80 (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
81 if (TII.mayAccessScratch(Inst))
82 return HWEvents::SCRATCH_WRITE_ACCESS;
83 return HWEvents::VMEM_WRITE_ACCESS;
84 }
85
86 if (SIInstrInfo::isFLAT(Inst))
87 return HWEvents::VMEM_READ_ACCESS;
88
89 if (SIInstrInfo::isImage(Inst)) {
91 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
92 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
93
94 if (BaseInfo->BVH)
95 return HWEvents::VMEM_BVH_READ_ACCESS;
96
97 // We have to make an additional check for isVSAMPLE here since some
98 // instructions don't have a sampler, but are still classified as sampler
99 // instructions for the purposes of e.g. waitcnt.
100 if (BaseInfo->Sampler || BaseInfo->MSAA || SIInstrInfo::isVSAMPLE(Inst))
101 return HWEvents::VMEM_SAMPLER_READ_ACCESS;
102 }
103
104 return HWEvents::VMEM_READ_ACCESS;
105}
106
108 const GCNSubtarget &ST,
109 const SIInstrInfo &TII) {
110 if (TII.isDS(Inst) && TII.usesLGKM_CNT(Inst)) {
111 if (TII.isAlwaysGDS(Inst.getOpcode()) ||
112 TII.hasModifiersSet(Inst, AMDGPU::OpName::gds))
113 return HWEvents::GDS_ACCESS | HWEvents::GDS_GPR_LOCK;
114
115 return HWEvents::LDS_ACCESS;
116 }
117
118 if (TII.isFLAT(Inst)) {
120 return getSimplifiedVMEMEventsFor(Inst, TII);
121
122 assert(Inst.mayLoadOrStore());
123 HWEvents E = HWEvents::NONE;
124 if (TII.mayAccessVMEMThroughFlat(Inst)) {
125 if (ST.hasWaitXcnt())
126 E |= HWEvents::VMEM_GROUP;
128 }
129
130 if (TII.mayAccessLDSThroughFlat(Inst))
131 E |= HWEvents::LDS_ACCESS;
132
134 E |= HWEvents::ASYNC_ACCESS;
135
136 return E;
137 }
138
140 return HWEvents::TENSOR_ACCESS;
141
142 if (SIInstrInfo::isVMEM(Inst) &&
144 Inst.getOpcode() == AMDGPU::BUFFER_WBL2)) {
145 // BUFFER_WBL2 is included here because unlike invalidates, has to be
146 // followed "S_WAITCNT vmcnt(0)" is needed after to ensure the writeback has
147 // completed.
149 if (ST.hasWaitXcnt())
150 E |= HWEvents::VMEM_GROUP;
151 if (ST.vmemWriteNeedsExpWaitcnt() &&
152 (Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst)))
153 E |= HWEvents::VMW_GPR_LOCK;
154
155 return E;
156 }
157
158 if (TII.isSMRD(Inst)) {
159 if (ST.hasWaitXcnt())
160 return HWEvents::SMEM_GROUP | HWEvents::SMEM_ACCESS;
161 return HWEvents::SMEM_ACCESS;
162 }
163
164 if (SIInstrInfo::isLDSDIR(Inst)) {
165 return HWEvents::EXP_LDS_ACCESS;
166 }
167
168 if (SIInstrInfo::isEXP(Inst)) {
169 unsigned Imm = TII.getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
171 return HWEvents::EXP_PARAM_ACCESS;
173 return HWEvents::EXP_POS_ACCESS;
174 return HWEvents::EXP_GPR_LOCK;
175 }
176
178 return HWEvents::SCC_WRITE;
179 }
180
181 switch (Inst.getOpcode()) {
182 case AMDGPU::S_SENDMSG:
183 case AMDGPU::S_SENDMSG_RTN_B32:
184 case AMDGPU::S_SENDMSG_RTN_B64:
185 case AMDGPU::S_SENDMSGHALT:
186 return HWEvents::SQ_MESSAGE;
187 case AMDGPU::S_MEMTIME:
188 case AMDGPU::S_MEMREALTIME:
189 case AMDGPU::S_GET_BARRIER_STATE_M0:
190 case AMDGPU::S_GET_BARRIER_STATE_IMM:
191 return HWEvents::SMEM_ACCESS;
192 }
193
194 return HWEvents::NONE;
195}
196
198 bool IsExpertMode) {
199 const SIInstrInfo &TII = *ST.getInstrInfo();
200
201 if (IsExpertMode)
202 return getEventsForImpl(Inst, ST, TII) |
204 return getEventsForImpl(Inst, ST, TII);
205}
206} // namespace AMDGPU
207
209 ListSeparator LS(" | ");
210#define AMDGPU_HW_EVENT(E, V) \
211 if (Events & AMDGPU::HWEvents::E) \
212 OS << LS << #E << " ";
213#include "AMDGPUHWEvents.def"
214 return OS;
215}
216
217} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Interface definition for SIInstrInfo.
This file contains some functions that are useful when dealing with strings.
Bit mask of hardware events.
A helper class to return the specified delimiter string after the first invocation of operator String...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
static bool isVMEM(const MachineInstr &MI)
static bool isEXP(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool usesTENSOR_CNT(const MachineInstr &MI)
static bool isLDSDIR(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isGFX12CacheInvOrWBInst(unsigned Opc)
static bool isSBarrierSCCWrite(unsigned Opcode)
static bool usesASYNC_CNT(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
static bool isAtomicNoRet(const MachineInstr &MI)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isDPMACCInstruction(unsigned Opc)
static HWEvents getExpertSchedulingEventType(const MachineInstr &Inst, const SIInstrInfo &TII)
HWEvents getSimplifiedVMEMEventsFor(const MachineInstr &Inst, const SIInstrInfo &TII)
static HWEvents getEventsForImpl(const MachineInstr &Inst, const GCNSubtarget &ST, const SIInstrInfo &TII)
bool getMUBUFIsBufferInv(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
HWEvents getEventsFor(const MachineInstr &Inst, const GCNSubtarget &ST, bool IsExpertMode)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)