19#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
31 return HWEvents::VGPR_XDL_WRITE;
33 if (
TII.isTRANS(Inst))
34 return HWEvents::VGPR_TRANS_WRITE;
37 return HWEvents::VGPR_DPMACC_WRITE;
39 return HWEvents::VGPR_CSMACC_WRITE;
47 return HWEvents::VGPR_FLAT_READ;
50 return HWEvents::VGPR_LDS_READ;
52 if (
TII.isVMEM(Inst) ||
TII.isVIMAGE(Inst) ||
TII.isVSAMPLE(Inst))
53 return HWEvents::VGPR_VMEM_READ;
56 return HWEvents::NONE;
63 case AMDGPU::GLOBAL_INV:
64 return HWEvents::GLOBAL_INV_ACCESS;
66 case AMDGPU::GLOBAL_WB:
67 case AMDGPU::GLOBAL_WBINV:
68 return HWEvents::VMEM_WRITE_ACCESS;
77 return HWEvents::VMEM_READ_ACCESS;
81 if (
TII.mayAccessScratch(Inst))
82 return HWEvents::SCRATCH_WRITE_ACCESS;
83 return HWEvents::VMEM_WRITE_ACCESS;
87 return HWEvents::VMEM_READ_ACCESS;
95 return HWEvents::VMEM_BVH_READ_ACCESS;
101 return HWEvents::VMEM_SAMPLER_READ_ACCESS;
104 return HWEvents::VMEM_READ_ACCESS;
110 if (
TII.isDS(Inst) &&
TII.usesLGKM_CNT(Inst)) {
112 TII.hasModifiersSet(Inst, AMDGPU::OpName::gds))
113 return HWEvents::GDS_ACCESS | HWEvents::GDS_GPR_LOCK;
115 return HWEvents::LDS_ACCESS;
118 if (
TII.isFLAT(Inst)) {
124 if (
TII.mayAccessVMEMThroughFlat(Inst)) {
125 if (ST.hasWaitXcnt())
126 E |= HWEvents::VMEM_GROUP;
130 if (
TII.mayAccessLDSThroughFlat(Inst))
131 E |= HWEvents::LDS_ACCESS;
134 E |= HWEvents::ASYNC_ACCESS;
140 return HWEvents::TENSOR_ACCESS;
144 Inst.
getOpcode() == AMDGPU::BUFFER_WBL2)) {
149 if (ST.hasWaitXcnt())
150 E |= HWEvents::VMEM_GROUP;
151 if (ST.vmemWriteNeedsExpWaitcnt() &&
153 E |= HWEvents::VMW_GPR_LOCK;
158 if (
TII.isSMRD(Inst)) {
159 if (ST.hasWaitXcnt())
160 return HWEvents::SMEM_GROUP | HWEvents::SMEM_ACCESS;
161 return HWEvents::SMEM_ACCESS;
165 return HWEvents::EXP_LDS_ACCESS;
169 unsigned Imm =
TII.getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
171 return HWEvents::EXP_PARAM_ACCESS;
173 return HWEvents::EXP_POS_ACCESS;
174 return HWEvents::EXP_GPR_LOCK;
178 return HWEvents::SCC_WRITE;
182 case AMDGPU::S_SENDMSG:
183 case AMDGPU::S_SENDMSG_RTN_B32:
184 case AMDGPU::S_SENDMSG_RTN_B64:
185 case AMDGPU::S_SENDMSGHALT:
186 return HWEvents::SQ_MESSAGE;
187 case AMDGPU::S_MEMTIME:
188 case AMDGPU::S_MEMREALTIME:
189 case AMDGPU::S_GET_BARRIER_STATE_M0:
190 case AMDGPU::S_GET_BARRIER_STATE_IMM:
191 return HWEvents::SMEM_ACCESS;
194 return HWEvents::NONE;
210#define AMDGPU_HW_EVENT(E, V) \
211 if (Events & AMDGPU::HWEvents::E) \
212 OS << LS << #E << " ";
213#include "AMDGPUHWEvents.def"
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Interface definition for SIInstrInfo.
Bit mask of hardware events.
A helper class to return the specified delimiter string after the first invocation of operator String...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
static bool isVMEM(const MachineInstr &MI)
static bool isEXP(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool usesTENSOR_CNT(const MachineInstr &MI)
static bool isLDSDIR(const MachineInstr &MI)
static bool isVSAMPLE(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isGFX12CacheInvOrWBInst(unsigned Opc)
static bool isSBarrierSCCWrite(unsigned Opcode)
static bool usesASYNC_CNT(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
static bool isAtomicNoRet(const MachineInstr &MI)
This class implements an extremely fast bulk output stream that can only output to a stream.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isDPMACCInstruction(unsigned Opc)
static HWEvents getExpertSchedulingEventType(const MachineInstr &Inst, const SIInstrInfo &TII)
HWEvents getSimplifiedVMEMEventsFor(const MachineInstr &Inst, const SIInstrInfo &TII)
static HWEvents getEventsForImpl(const MachineInstr &Inst, const GCNSubtarget &ST, const SIInstrInfo &TII)
bool getMUBUFIsBufferInv(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
HWEvents getEventsFor(const MachineInstr &Inst, const GCNSubtarget &ST, bool IsExpertMode)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)