LLVM 23.0.0git
AMDGPUInsertDelayAlu.cpp
Go to the documentation of this file.
1//===- AMDGPUInsertDelayAlu.cpp - Insert s_delay_alu instructions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_delay_alu instructions to avoid stalls on GFX11+.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "GCNSubtarget.h"
17#include "SIInstrInfo.h"
19#include "llvm/ADT/SetVector.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "amdgpu-insert-delay-alu"
24
25namespace {
26
27class AMDGPUInsertDelayAlu {
28public:
29 const GCNSubtarget *ST;
30 const SIInstrInfo *SII;
32
33 const TargetSchedModel *SchedModel;
34
35 // Return true if MI waits for all outstanding VALU instructions to complete.
36 static bool instructionWaitsForVALU(const MachineInstr &MI) {
37 // These instruction types wait for VA_VDST==0 before issuing.
41 return true;
42 if (MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B32 ||
43 MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
44 return true;
45 if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
46 AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0)
47 return true;
48 return false;
49 }
50
51 static bool instructionWaitsForSGPRWrites(const MachineInstr &MI) {
52 // These instruction types wait for VA_SDST==0 before issuing.
54 return true;
55
57 for (auto &Op : MI.operands()) {
58 if (Op.isReg())
59 return true;
60 }
61 }
62 return false;
63 }
64
65 // Types of delay that can be encoded in an s_delay_alu instruction.
66 enum DelayType { VALU, TRANS, SALU, OTHER };
67
68 // Get the delay type for a MachineInstr.
69 DelayType getDelayType(const MachineInstr &MI) {
70 // Non-F64 TRANS instructions use a separate delay type.
72 !AMDGPU::isDPMACCInstruction(MI.getOpcode()))
73 return TRANS;
74 // WMMA XDL ops are treated the same as TRANS.
75 if (ST->hasGFX1250Insts() && SII->isXDLWMMA(MI))
76 return TRANS;
77 if (SIInstrInfo::isVALU(MI, /*AllowLDSDMA=*/true))
78 return VALU;
80 return SALU;
81 return OTHER;
82 }
83
84 // Information about the last instruction(s) that wrote to a particular
85 // regunit. In straight-line code there will only be one such instruction, but
86 // when control flow converges we merge the delay information from each path
87 // to represent the union of the worst-case delays of each type.
88 struct DelayInfo {
89 // One larger than the maximum number of (non-TRANS) VALU instructions we
90 // can encode in an s_delay_alu instruction.
91 static constexpr unsigned VALU_MAX = 5;
92
93 // One larger than the maximum number of TRANS instructions we can encode in
94 // an s_delay_alu instruction.
95 static constexpr unsigned TRANS_MAX = 4;
96
97 // One larger than the maximum number of SALU cycles we can encode in an
98 // s_delay_alu instruction.
99 static constexpr unsigned SALU_CYCLES_MAX = 4;
100
101 // If it was written by a (non-TRANS) VALU, remember how many clock cycles
102 // are left until it completes, and how many other (non-TRANS) VALU we have
103 // seen since it was issued.
104 uint8_t VALUCycles = 0;
105 uint8_t VALUNum = VALU_MAX;
106
107 // If it was written by a TRANS, remember how many clock cycles are left
108 // until it completes, and how many other TRANS we have seen since it was
109 // issued.
110 uint8_t TRANSCycles = 0;
111 uint8_t TRANSNum = TRANS_MAX;
112 // Also remember how many other (non-TRANS) VALU we have seen since it was
113 // issued. When an instruction depends on both a prior TRANS and a prior
114 // non-TRANS VALU, this is used to decide whether to encode a wait for just
115 // one or both of them.
116 uint8_t TRANSNumVALU = VALU_MAX;
117
118 // If it was written by an SALU, remember how many clock cycles are left
119 // until it completes.
120 uint8_t SALUCycles = 0;
121
122 DelayInfo() = default;
123
124 DelayInfo(DelayType Type, unsigned Cycles) {
125 switch (Type) {
126 default:
127 llvm_unreachable("unexpected type");
128 case VALU:
129 VALUCycles = Cycles;
130 VALUNum = 0;
131 break;
132 case TRANS:
133 TRANSCycles = Cycles;
134 TRANSNum = 0;
135 TRANSNumVALU = 0;
136 break;
137 case SALU:
138 // Guard against pseudo-instructions like SI_CALL which are marked as
139 // SALU but with a very high latency.
140 SALUCycles = std::min(Cycles, SALU_CYCLES_MAX);
141 break;
142 }
143 }
144
145 bool operator==(const DelayInfo &RHS) const {
146 return VALUCycles == RHS.VALUCycles && VALUNum == RHS.VALUNum &&
147 TRANSCycles == RHS.TRANSCycles && TRANSNum == RHS.TRANSNum &&
148 TRANSNumVALU == RHS.TRANSNumVALU && SALUCycles == RHS.SALUCycles;
149 }
150
151 bool operator!=(const DelayInfo &RHS) const { return !(*this == RHS); }
152
153 // Merge another DelayInfo into this one, to represent the union of the
154 // worst-case delays of each type.
155 void merge(const DelayInfo &RHS) {
156 VALUCycles = std::max(VALUCycles, RHS.VALUCycles);
157 VALUNum = std::min(VALUNum, RHS.VALUNum);
158 TRANSCycles = std::max(TRANSCycles, RHS.TRANSCycles);
159 TRANSNum = std::min(TRANSNum, RHS.TRANSNum);
160 TRANSNumVALU = std::min(TRANSNumVALU, RHS.TRANSNumVALU);
161 SALUCycles = std::max(SALUCycles, RHS.SALUCycles);
162 }
163
164 // Update this DelayInfo after issuing an instruction of the specified type.
165 // Cycles is the number of cycles it takes to issue the instruction. Return
166 // true if there is no longer any useful delay info.
167 bool advance(DelayType Type, unsigned Cycles) {
168 bool Erase = true;
169
170 VALUNum += (Type == VALU);
171 if (VALUNum >= VALU_MAX || VALUCycles <= Cycles) {
172 // Forget about the VALU instruction. It was too far back or has
173 // definitely completed by now.
174 VALUNum = VALU_MAX;
175 VALUCycles = 0;
176 } else {
177 VALUCycles -= Cycles;
178 Erase = false;
179 }
180
181 TRANSNum += (Type == TRANS);
182 TRANSNumVALU += (Type == VALU);
183 if (TRANSNum >= TRANS_MAX || TRANSCycles <= Cycles) {
184 // Forget about any TRANS instruction. It was too far back or has
185 // definitely completed by now.
186 TRANSNum = TRANS_MAX;
187 TRANSNumVALU = VALU_MAX;
188 TRANSCycles = 0;
189 } else {
190 TRANSCycles -= Cycles;
191 Erase = false;
192 }
193
194 if (SALUCycles <= Cycles) {
195 // Forget about any SALU instruction. It has definitely completed by
196 // now.
197 SALUCycles = 0;
198 } else {
199 SALUCycles -= Cycles;
200 Erase = false;
201 }
202
203 return Erase;
204 }
205
206#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
207 void dump() const {
208 if (VALUCycles)
209 dbgs() << " VALUCycles=" << (int)VALUCycles;
210 if (VALUNum < VALU_MAX)
211 dbgs() << " VALUNum=" << (int)VALUNum;
212 if (TRANSCycles)
213 dbgs() << " TRANSCycles=" << (int)TRANSCycles;
214 if (TRANSNum < TRANS_MAX)
215 dbgs() << " TRANSNum=" << (int)TRANSNum;
216 if (TRANSNumVALU < VALU_MAX)
217 dbgs() << " TRANSNumVALU=" << (int)TRANSNumVALU;
218 if (SALUCycles)
219 dbgs() << " SALUCycles=" << (int)SALUCycles;
220 }
221#endif
222 };
223
224 // A map from regunits to the delay info for that regunit.
225 struct DelayState : DenseMap<MCRegUnit, DelayInfo> {
226 // Merge another DelayState into this one by merging the delay info for each
227 // regunit.
228 void merge(const DelayState &RHS) {
229 for (const auto &KV : RHS) {
230 iterator It;
231 bool Inserted;
232 std::tie(It, Inserted) = insert(KV);
233 if (!Inserted)
234 It->second.merge(KV.second);
235 }
236 }
237
238 // Advance the delay info for each regunit, erasing any that are no longer
239 // useful.
240 void advance(DelayType Type, unsigned Cycles) {
241 remove_if([&](auto &P) { return P.second.advance(Type, Cycles); });
242 }
243
244 void advanceByVALUNum(unsigned VALUNum) {
245 remove_if([&](auto &P) {
246 return P.second.VALUNum >= VALUNum && P.second.VALUCycles > 0;
247 });
248 }
249
250#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
251 void dump(const TargetRegisterInfo *TRI) const {
252 if (empty()) {
253 dbgs() << " empty\n";
254 return;
255 }
256
257 // Dump DelayInfo for each RegUnit in numerical order.
259 Order.reserve(size());
260 for (const_iterator I = begin(), E = end(); I != E; ++I)
261 Order.push_back(I);
262 llvm::sort(Order, [](const const_iterator &A, const const_iterator &B) {
263 return A->first < B->first;
264 });
265 for (const_iterator I : Order) {
266 dbgs() << " " << printRegUnit(I->first, TRI);
267 I->second.dump();
268 dbgs() << "\n";
269 }
270 }
271#endif
272 };
273
274 // The saved delay state at the end of each basic block.
276
277 // Emit an s_delay_alu instruction if necessary before MI.
278 MachineInstr *emitDelayAlu(MachineInstr &MI, DelayInfo Delay,
279 MachineInstr *LastDelayAlu) {
280 unsigned Imm = 0;
281
282 // Wait for a TRANS instruction.
283 if (Delay.TRANSNum < DelayInfo::TRANS_MAX)
284 Imm |= 4 + Delay.TRANSNum;
285
286 // Wait for a VALU instruction (if it's more recent than any TRANS
287 // instruction that we're also waiting for).
288 if (Delay.VALUNum < DelayInfo::VALU_MAX &&
289 Delay.VALUNum <= Delay.TRANSNumVALU) {
290 if (Imm & 0xf)
291 Imm |= Delay.VALUNum << 7;
292 else
293 Imm |= Delay.VALUNum;
294 }
295
296 // Wait for an SALU instruction.
297 if (Delay.SALUCycles) {
298 assert(Delay.SALUCycles < DelayInfo::SALU_CYCLES_MAX);
299 if (Imm & 0x780) {
300 // We have already encoded a VALU and a TRANS delay. There's no room in
301 // the encoding for an SALU delay as well, so just drop it.
302 } else if (Imm & 0xf) {
303 Imm |= (Delay.SALUCycles + 8) << 7;
304 } else {
305 Imm |= Delay.SALUCycles + 8;
306 }
307 }
308
309 // Don't emit the s_delay_alu instruction if there's nothing to wait for.
310 if (!Imm)
311 return LastDelayAlu;
312
313 // If we only need to wait for one instruction, try encoding it in the last
314 // s_delay_alu that we emitted.
315 if (!(Imm & 0x780) && LastDelayAlu) {
316 unsigned Skip = 0;
317 for (auto I = MachineBasicBlock::instr_iterator(LastDelayAlu),
319 ++I != E;) {
320 if (I->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
321 // It is not deterministic whether the skip count counts
322 // S_SET_VGPR_MSB instructions or not, so do not include them in a
323 // skip region.
324 Skip = 6;
325 break;
326 }
327 if (!I->isBundle() && !I->isMetaInstruction())
328 ++Skip;
329 }
330 if (Skip < 6) {
331 MachineOperand &Op = LastDelayAlu->getOperand(0);
332 unsigned LastImm = Op.getImm();
333 assert((LastImm & ~0xf) == 0 &&
334 "Remembered an s_delay_alu with no room for another delay!");
335 LastImm |= Imm << 7 | Skip << 4;
336 Op.setImm(LastImm);
337 return nullptr;
338 }
339 }
340
341 auto &MBB = *MI.getParent();
342 MachineInstr *DelayAlu =
343 BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_DELAY_ALU)).addImm(Imm);
344 // Remember the s_delay_alu for next time if there is still room in it to
345 // encode another delay.
346 return (Imm & 0x780) ? nullptr : DelayAlu;
347 }
348
349 bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {
350 DelayState State;
351 for (auto *Pred : MBB.predecessors())
352 State.merge(BlockState[Pred]);
353
354 LLVM_DEBUG(dbgs() << " State at start of " << printMBBReference(MBB)
355 << "\n";
356 State.dump(TRI););
357
358 bool Changed = false;
359 MachineInstr *LastDelayAlu = nullptr;
360
361 // FIXME: 0 is a valid register unit.
362 MCRegUnit LastSGPRFromVALU = static_cast<MCRegUnit>(0);
363 // Iterate over the contents of bundles, but don't emit any instructions
364 // inside a bundle.
365 for (auto &MI : MBB.instrs()) {
366 if (MI.isBundle() || MI.isMetaInstruction())
367 continue;
368
369 // Ignore some more instructions that do not generate any code.
370 switch (MI.getOpcode()) {
371 case AMDGPU::SI_RETURN_TO_EPILOG:
372 continue;
373 }
374
375 DelayType Type = getDelayType(MI);
376
377 if (instructionWaitsForSGPRWrites(MI)) {
378 auto It = State.find(LastSGPRFromVALU);
379 if (It != State.end()) {
380 DelayInfo Info = It->getSecond();
381 State.advanceByVALUNum(Info.VALUNum);
382 // FIXME: 0 is a valid register unit.
383 LastSGPRFromVALU = static_cast<MCRegUnit>(0);
384 }
385 }
386
387 if (instructionWaitsForVALU(MI)) {
388 // Forget about all outstanding VALU delays.
389 // TODO: This is overkill since it also forgets about SALU delays.
390 State = DelayState();
391 } else if (Type != OTHER) {
392 DelayInfo Delay;
393 // TODO: Scan implicit uses too?
394 for (const auto &Op : MI.explicit_uses()) {
395 if (Op.isReg()) {
396 // One of the operands of the writelane is also the output operand.
397 // This creates the insertion of redundant delays. Hence, we have to
398 // ignore this operand.
399 if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied())
400 continue;
401 for (MCRegUnit Unit : TRI->regunits(Op.getReg())) {
402 auto It = State.find(Unit);
403 if (It != State.end()) {
404 Delay.merge(It->second);
405 State.erase(Unit);
406 }
407 }
408 }
409 }
410
411 if (SII->isVALU(MI.getOpcode(), /*AllowLDSDMA=*/true)) {
412 for (const auto &Op : MI.defs()) {
413 Register Reg = Op.getReg();
414 if (AMDGPU::isSGPR(Reg, TRI)) {
415 LastSGPRFromVALU = *TRI->regunits(Reg).begin();
416 break;
417 }
418 }
419 }
420
421 if (Emit && !MI.isBundledWithPred()) {
422 // TODO: For VALU->SALU delays should we use s_delay_alu or s_nop or
423 // just ignore them?
424 LastDelayAlu = emitDelayAlu(MI, Delay, LastDelayAlu);
425 }
426 }
427
428 if (Type != OTHER) {
429 // TODO: Scan implicit defs too?
430 for (const auto &Op : MI.defs()) {
431 unsigned Latency = SchedModel->computeOperandLatency(
432 &MI, Op.getOperandNo(), nullptr, 0);
433 for (MCRegUnit Unit : TRI->regunits(Op.getReg()))
434 State[Unit] = DelayInfo(Type, Latency);
435 }
436 }
437
438 // Advance by the number of cycles it takes to issue this instruction.
439 // TODO: Use a more advanced model that accounts for instructions that
440 // take multiple cycles to issue on a particular pipeline.
441 unsigned Cycles = SIInstrInfo::getNumWaitStates(MI);
442 // TODO: In wave64 mode, double the number of cycles for VALU and VMEM
443 // instructions on the assumption that they will usually have to be issued
444 // twice?
445 State.advance(Type, Cycles);
446
447 LLVM_DEBUG(dbgs() << " State after " << MI; State.dump(TRI););
448 }
449
450 if (Emit) {
451 assert(State == BlockState[&MBB] &&
452 "Basic block state should not have changed on final pass!");
453 } else if (DelayState &BS = BlockState[&MBB]; State != BS) {
454 BS = std::move(State);
455 Changed = true;
456 }
457 return Changed;
458 }
459
460 bool run(MachineFunction &MF) {
461 LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
462 << "\n");
463
464 ST = &MF.getSubtarget<GCNSubtarget>();
465 if (!ST->hasDelayAlu())
466 return false;
467
469
470 if (MFI.getMaxWavesPerEU() == 1)
471 return false;
472
473 SII = ST->getInstrInfo();
474 TRI = ST->getRegisterInfo();
475 SchedModel = &SII->getSchedModel();
476
477 // Calculate the delay state for each basic block, iterating until we reach
478 // a fixed point.
480 for (auto &MBB : reverse(MF))
481 WorkList.insert(&MBB);
482 while (!WorkList.empty()) {
483 auto &MBB = *WorkList.pop_back_val();
484 bool Changed = runOnMachineBasicBlock(MBB, false);
485 if (Changed)
486 WorkList.insert_range(MBB.successors());
487 }
488
489 LLVM_DEBUG(dbgs() << "Final pass over all BBs\n");
490
491 // Make one last pass over all basic blocks to emit s_delay_alu
492 // instructions.
493 bool Changed = false;
494 for (auto &MBB : MF)
495 Changed |= runOnMachineBasicBlock(MBB, true);
496 return Changed;
497 }
498};
499
500class AMDGPUInsertDelayAluLegacy : public MachineFunctionPass {
501public:
502 static char ID;
503
504 AMDGPUInsertDelayAluLegacy() : MachineFunctionPass(ID) {}
505
506 void getAnalysisUsage(AnalysisUsage &AU) const override {
507 AU.setPreservesCFG();
509 }
510
511 bool runOnMachineFunction(MachineFunction &MF) override {
512 if (skipFunction(MF.getFunction()))
513 return false;
514 AMDGPUInsertDelayAlu Impl;
515 return Impl.run(MF);
516 }
517};
518} // namespace
519
523 if (!AMDGPUInsertDelayAlu().run(MF))
524 return PreservedAnalyses::all();
526 PA.preserveSet<CFGAnalyses>();
527 return PA;
528} // end namespace llvm
529
530char AMDGPUInsertDelayAluLegacy::ID = 0;
531
532char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAluLegacy::ID;
533
534INITIALIZE_PASS(AMDGPUInsertDelayAluLegacy, DEBUG_TYPE,
535 "AMDGPU Insert Delay ALU", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
Interface definition for SIInstrInfo.
This file implements a set that has insertion order iteration characteristics.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:275
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Instructions::iterator instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool isXDLWMMA(const MachineInstr &MI) const
static bool isSALU(const MachineInstr &MI)
const TargetSchedModel & getSchedModel() const
static bool isVALU(const MachineInstr &MI, bool AllowLDSDMA)
static bool isTRANS(const MachineInstr &MI)
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
A vector that has set insertion semantics.
Definition SetVector.h:57
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned decodeFieldVaVdst(unsigned Encoded)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isDPMACCInstruction(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr bool isFLAT(const T &...O)
Definition SIDefines.h:276
constexpr bool isBuffer(const T &...O)
Definition SIDefines.h:258
constexpr bool isSMRD(const T &...O)
Definition SIDefines.h:261
constexpr bool isMIMG(const T &...O)
Definition SIDefines.h:264
constexpr bool isEXP(const T &...O)
Definition SIDefines.h:273
constexpr bool isDS(const T &...O)
Definition SIDefines.h:279
constexpr bool isSALU(const T &...O)
Definition SIDefines.h:204
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2142
LLVM_ABI Printable printRegUnit(MCRegUnit Unit, const TargetRegisterInfo *TRI)
Create Printable object to print register units on a raw_ostream.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
char & AMDGPUInsertDelayAluID
auto remove_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1784
DWARFExpression::Operation Op
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &MFAM)