LLVM 23.0.0git
AMDGPULowerVGPREncoding.cpp
Go to the documentation of this file.
1//===- AMDGPULowerVGPREncoding.cpp - lower VGPRs above v255 ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Lower VGPRs above first 256 on gfx1250.
11///
12/// The pass scans used VGPRs and inserts S_SET_VGPR_MSB instructions to switch
13/// VGPR addressing mode. The mode change is effective until the next change.
14/// This instruction provides high bits of a VGPR address for four of the
15/// operands: vdst, src0, src1, and src2, or other 4 operands depending on the
16/// instruction encoding. If bits are set they are added as MSB to the
17/// corresponding operand VGPR number.
18///
19/// There is no need to replace actual register operands because encoding of the
20/// high and low VGPRs is the same. I.e. v0 has the encoding 0x100, so does
21/// v256. v1 has the encoding 0x101 and v257 has the same encoding. So high
22/// VGPRs will survive until actual encoding and will result in a same actual
23/// bit encoding.
24///
25/// As a result the pass only inserts S_SET_VGPR_MSB to provide an actual offset
26/// to a VGPR address of the subseqent instructions. The InstPrinter will take
27/// care of the printing a low VGPR instead of a high one. In prinicple this
28/// shall be viable to print actual high VGPR numbers, but that would disagree
29/// with a disasm printing and create a situation where asm text is not
30/// deterministic.
31///
32/// This pass creates a convention where non-fall through basic blocks shall
33/// start with all 4 MSBs zero. Otherwise a disassembly would not be readable.
34/// An optimization here is possible but deemed not desirable because of the
35/// readbility concerns.
36///
37/// Consequentially the ABI is set to expect all 4 MSBs to be zero on entry.
38/// The pass must run very late in the pipeline to make sure no changes to VGPR
39/// operands will be made after it.
40//
41//===----------------------------------------------------------------------===//
42
44#include "AMDGPU.h"
45#include "GCNSubtarget.h"
46#include "SIDefines.h"
47#include "SIInstrInfo.h"
48#include "llvm/ADT/bit.h"
49#include "llvm/Support/Debug.h"
51
52using namespace llvm;
53
54#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
55
56namespace {
57
58class AMDGPULowerVGPREncoding {
59 static constexpr unsigned OpNum = 4;
60 static constexpr unsigned BitsPerField = 2;
61 static constexpr unsigned NumFields = 4;
62 static constexpr unsigned ModeWidth = NumFields * BitsPerField;
63 static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
64 static constexpr unsigned VGPRMSBShift =
66
67 struct OpMode {
68 // No MSBs set means they are not required to be of a particular value.
69 std::optional<unsigned> MSBits;
70
71 bool update(const OpMode &New, bool &Rewritten) {
72 bool Updated = false;
73 if (New.MSBits) {
74 if (*New.MSBits != MSBits.value_or(0)) {
75 Updated = true;
76 Rewritten |= MSBits.has_value();
77 }
78 MSBits = New.MSBits;
79 }
80 return Updated;
81 }
82 };
83
84 struct ModeTy {
85 OpMode Ops[OpNum];
86
87 bool update(const ModeTy &New, bool &Rewritten) {
88 bool Updated = false;
89 for (unsigned I : seq(OpNum))
90 Updated |= Ops[I].update(New.Ops[I], Rewritten);
91 return Updated;
92 }
93
94 unsigned encode() const {
95 // Layout: [src0 msb, src1 msb, src2 msb, dst msb].
96 unsigned V = 0;
97 for (const auto &[I, Op] : enumerate(Ops))
98 V |= Op.MSBits.value_or(0) << (I * 2);
99 return V;
100 }
101
102 void print(raw_ostream &OS) const {
103 static const char *FieldNames[] = {"src0", "src1", "src2", "dst"};
104 OS << '{';
105 for (const auto &[I, Op] : enumerate(Ops)) {
106 if (I)
107 OS << ", ";
108 OS << FieldNames[I] << '=';
109 if (Op.MSBits)
110 OS << *Op.MSBits;
111 else
112 OS << '?';
113 }
114 OS << '}';
115 }
116
117 // Check if this mode is compatible with required \p NewMode without
118 // modification.
119 bool isCompatible(const ModeTy NewMode) const {
120 for (unsigned I : seq(OpNum)) {
121 if (!NewMode.Ops[I].MSBits.has_value())
122 continue;
123 if (Ops[I].MSBits.value_or(0) != NewMode.Ops[I].MSBits.value_or(0))
124 return false;
125 }
126 return true;
127 }
128 };
129
130public:
131 bool run(MachineFunction &MF);
132
133private:
134 const SIInstrInfo *TII;
135 const SIRegisterInfo *TRI;
136
137 // Current basic block.
139
140 /// Most recent s_set_* instruction.
141 MachineInstr *MostRecentModeSet;
142
143 /// Current mode bits.
144 ModeTy CurrentMode;
145
146 /// Number of current hard clause instructions.
147 unsigned ClauseLen;
148
149 /// Number of hard clause instructions remaining.
150 unsigned ClauseRemaining;
151
152 /// Clause group breaks.
153 unsigned ClauseBreaks;
154
155 /// Last hard clause instruction.
157
158 // Remember whether XCNT is known to be zero because of an S_SET_VGPR_MSB
159 // instruction that we inserted, which implicitly waits for XCNT==0.
160 bool XCntIsZero;
161
162 /// Insert mode change before \p I. \returns true if mode was changed.
163 bool setMode(ModeTy NewMode, MachineBasicBlock::instr_iterator I);
164
165 /// Reset mode to default.
166 void resetMode(MachineBasicBlock::instr_iterator I) {
167 ModeTy Mode;
168 for (OpMode &Op : Mode.Ops)
169 Op.MSBits = 0;
170 setMode(Mode, I);
171 }
172
173 /// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.
174 std::optional<unsigned> getMSBs(const MachineOperand &MO) const;
175
176 /// Handle single \p MI. \return true if changed.
177 bool runOnMachineInstr(MachineInstr &MI);
178
179 /// Compute the mode for a single \p MI given \p Ops operands
180 /// bit mapping. Optionally takes second array \p Ops2 for VOPD.
181 /// If provided and an operand from \p Ops is not a VGPR, then \p Ops2
182 /// is checked.
183 void computeMode(ModeTy &NewMode, const MachineInstr &MI,
184 const AMDGPU::OpName Ops[OpNum],
185 const AMDGPU::OpName *Ops2 = nullptr);
186
187 /// Check if an instruction \p I is within a clause and returns a suitable
188 /// iterator to insert mode change. It may also modify the S_CLAUSE
189 /// instruction to extend it or drop the clause if it cannot be adjusted.
192
193 /// Check if an instruction \p I is immediately after another program state
194 /// instruction which it cannot coissue with. If so, insert before that
195 /// instruction to encourage more coissuing.
198
199 /// S_SET_VGPR_MSB immediately after S_SETREG_IMM32_B32 targeting MODE is
200 /// silently dropped on GFX1250. When set, the next S_SET_VGPR_MSB insertion
201 /// must be preceded by S_NOP to avoid the hazard.
202 bool needNopBeforeSetVGPRMSB(MachineBasicBlock::instr_iterator I);
203
204 /// Handle S_SETREG_IMM32_B32 targeting MODE register. On certain hardware,
205 /// this instruction clobbers VGPR MSB bits[12:19], so we need to restore
206 /// the current mode. \returns true if the instruction was modified or a
207 /// new one was inserted.
208 bool handleSetregMode(MachineInstr &MI);
209
210 /// Update bits[12:19] of the imm operand in S_SETREG_IMM32_B32 to contain
211 /// the VGPR MSB mode value. \returns true if the immediate was changed.
212 bool updateSetregModeImm(MachineInstr &MI, int64_t ModeValue);
213};
214
215bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
217 LLVM_DEBUG({
218 dbgs() << " setMode: NewMode=";
219 NewMode.print(dbgs());
220 dbgs() << " CurrentMode=";
221 CurrentMode.print(dbgs());
222 dbgs() << " MostRecentModeSet=" << (MostRecentModeSet ? "yes" : "null");
223 if (I != MBB->instr_end())
224 dbgs() << " before: " << *I;
225 else
226 dbgs() << " at end\n";
227 });
228
229 // Record previous mode into high 8 bits of the immediate.
230 int64_t OldModeBits = CurrentMode.encode() << ModeWidth;
231
232 bool Rewritten = false;
233 if (!CurrentMode.update(NewMode, Rewritten)) {
234 LLVM_DEBUG(dbgs() << " -> no change needed\n");
235 return false;
236 }
237
238 LLVM_DEBUG(dbgs() << " Rewritten=" << Rewritten << " after update\n");
239
240 if (MostRecentModeSet && !Rewritten) {
241 // Update MostRecentModeSet with the new mode. It can be either
242 // S_SET_VGPR_MSB or S_SETREG_IMM32_B32 (with Size <= 12).
243 if (MostRecentModeSet->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
244 MachineOperand &Op = MostRecentModeSet->getOperand(0);
245 // Carry old mode bits from the existing instruction.
246 int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
247 Op.setImm(CurrentMode.encode() | OldModeBits);
248 LLVM_DEBUG(dbgs() << " -> piggybacked onto S_SET_VGPR_MSB: "
249 << *MostRecentModeSet);
250 } else {
251 assert(MostRecentModeSet->getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
252 "unexpected MostRecentModeSet opcode");
253 updateSetregModeImm(*MostRecentModeSet, CurrentMode.encode());
254 LLVM_DEBUG(dbgs() << " -> piggybacked onto S_SETREG_IMM32_B32: "
255 << *MostRecentModeSet);
256 }
257
258 return true;
259 }
260
261 MachineBasicBlock::instr_iterator InsertPt = handleClause(I);
262 InsertPt = handleCoissue(InsertPt);
263 // Case 2 match in handleSetregMode: the setreg's imm[12:19] matched
264 // current MSBs, but the next VALU needs different MSBs, so this
265 // S_SET_VGPR_MSB would land right after the setreg. Insert S_NOP to
266 // prevent it from being silently dropped.
267 if (needNopBeforeSetVGPRMSB(I))
268 BuildMI(*MBB, InsertPt, {}, TII->get(AMDGPU::S_NOP)).addImm(0);
269 MostRecentModeSet =
270 BuildMI(*MBB, InsertPt, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
271 .addImm(NewMode.encode() | OldModeBits);
272 LLVM_DEBUG(dbgs() << " -> inserted new S_SET_VGPR_MSB: "
273 << *MostRecentModeSet);
274
275 // If we inserted S_SET_VGPR_MSB early then XCNT should remain zero from the
276 // insertion point to the current instruction. Remove any redundant
277 // S_WAIT_XCNT instructions in that range.
278 for (MachineInstr &MI : make_early_inc_range(make_range(InsertPt, I))) {
280 if (MI.getOpcode() == AMDGPU::S_WAIT_XCNT)
281 MI.eraseFromBundle();
282 }
283 XCntIsZero = true;
284
285 CurrentMode = NewMode;
286 return true;
287}
288
289std::optional<unsigned>
290AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {
291 if (!MO.isReg())
292 return std::nullopt;
293
294 MCRegister Reg = MO.getReg();
295 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
296 if (!RC || !TRI->isVGPRClass(RC))
297 return std::nullopt;
298
299 unsigned Idx = TRI->getHWRegIndex(Reg);
300 return Idx >> 8;
301}
302
303void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode,
304 const MachineInstr &MI,
305 const AMDGPU::OpName Ops[OpNum],
306 const AMDGPU::OpName *Ops2) {
307 NewMode = {};
308
309 for (unsigned I = 0; I < OpNum; ++I) {
310 const MachineOperand *Op = TII->getNamedOperand(MI, Ops[I]);
311
312 std::optional<unsigned> MSBits;
313 if (Op)
314 MSBits = getMSBs(*Op);
315
316#if !defined(NDEBUG)
317 if (MSBits.has_value() && Ops2) {
318 const MachineOperand *Op2 = TII->getNamedOperand(MI, Ops2[I]);
319 if (Op2) {
320 std::optional<unsigned> MSBits2;
321 MSBits2 = getMSBs(*Op2);
322 if (MSBits2.has_value() && MSBits != MSBits2)
323 llvm_unreachable("Invalid VOPD pair was created");
324 }
325 }
326#endif
327
328 if (!MSBits.has_value() && Ops2) {
329 Op = TII->getNamedOperand(MI, Ops2[I]);
330 if (Op)
331 MSBits = getMSBs(*Op);
332 }
333
334 if (!MSBits.has_value())
335 continue;
336
337 // Skip tied uses of src2 of VOP2, these will be handled along with defs and
338 // only vdst bit affects these operands. We cannot skip tied uses of VOP3,
339 // these uses are real even if must match the vdst.
340 if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&
343 TII->hasVALU32BitEncoding(MI.getOpcode()))))
344 continue;
345
346 NewMode.Ops[I].MSBits = MSBits.value();
347 }
348}
349
350bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
352 if (Ops.first) {
353 ModeTy NewMode;
354 computeMode(NewMode, MI, Ops.first, Ops.second);
355 LLVM_DEBUG({
356 dbgs() << " runOnMachineInstr: ";
357 MI.print(dbgs());
358 dbgs() << " computed NewMode=";
359 NewMode.print(dbgs());
360 dbgs() << " compatible=" << CurrentMode.isCompatible(NewMode) << '\n';
361 });
362 if (!CurrentMode.isCompatible(NewMode) && MI.isCommutable() &&
363 TII->commuteInstruction(MI)) {
364 ModeTy NewModeCommuted;
365 computeMode(NewModeCommuted, MI, Ops.first, Ops.second);
366 LLVM_DEBUG({
367 dbgs() << " commuted NewMode=";
368 NewModeCommuted.print(dbgs());
369 dbgs() << " compatible=" << CurrentMode.isCompatible(NewModeCommuted)
370 << '\n';
371 });
372 if (CurrentMode.isCompatible(NewModeCommuted)) {
373 // Update CurrentMode with mode bits the commuted instruction relies on.
374 // This prevents later instructions from piggybacking and corrupting
375 // those bits (e.g., a nullopt src treated as 0 could be overwritten).
376 bool Unused = false;
377 CurrentMode.update(NewModeCommuted, Unused);
378 // MI was modified by the commute above.
379 return true;
380 }
381 // Commute back.
382 if (!TII->commuteInstruction(MI))
383 llvm_unreachable("Failed to restore commuted instruction.");
384 }
385 return setMode(NewMode, MI.getIterator());
386 }
387 assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());
388 return false;
389}
390
392AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
393 if (!ClauseRemaining)
394 return I;
395
396 // A clause cannot start with a special instruction, place it right before
397 // the clause.
398 if (ClauseRemaining == ClauseLen) {
399 I = Clause->getPrevNode()->getIterator();
400 assert(I->isBundle());
401 return I;
402 }
403
404 // If a clause defines breaks each group cannot start with a mode change.
405 // just drop the clause.
406 if (ClauseBreaks) {
407 Clause->eraseFromBundle();
408 ClauseRemaining = 0;
409 return I;
410 }
411
412 // Otherwise adjust a number of instructions in the clause if it fits.
413 // If it does not clause will just become shorter. Since the length
414 // recorded in the clause is one less, increment the length after the
415 // update. Note that SIMM16[5:0] must be 1-62, not 0 or 63.
416 if (ClauseLen < 63)
417 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));
418
419 ++ClauseLen;
420
421 return I;
422}
423
425AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
426 // "Program State instructions" are instructions which are used to control
427 // operation of the GPU rather than performing arithmetic. Such instructions
428 // have different coissuing rules w.r.t s_set_vgpr_msb.
429 auto isProgramStateInstr = [this](MachineInstr *MI) {
430 unsigned Opc = MI->getOpcode();
431 return TII->isBarrier(Opc) || TII->isWaitcnt(Opc) ||
432 Opc == AMDGPU::S_DELAY_ALU;
433 };
434
435 while (I != MBB->begin()) {
436 auto Prev = std::prev(I);
437 if (!isProgramStateInstr(&*Prev))
438 return I;
439 I = Prev;
440 }
441
442 return I;
443}
444
445bool AMDGPULowerVGPREncoding::needNopBeforeSetVGPRMSB(
447 while (I != MBB->begin()) {
448 I = std::prev(I);
449 if (I->getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
450 MachineOperand *SIMM16Op =
451 TII->getNamedOperand(*I, AMDGPU::OpName::simm16);
452 auto [HwRegId, Offset, Size] =
454 if (HwRegId == AMDGPU::Hwreg::ID_MODE)
455 return true;
456 }
457 if (!I->isMetaInstruction())
458 return false;
459 }
460 // FIXME: Return true if the previous MBB falls through and ends with
461 // S_SETREG_IMM32_B32.
462 return false;
463}
464
465/// Convert mode value from S_SET_VGPR_MSB format to MODE register format.
466/// S_SET_VGPR_MSB uses: (src0[0-1], src1[2-3], src2[4-5], dst[6-7])
467/// MODE register uses: (dst[0-1], src0[2-3], src1[4-5], src2[6-7])
468/// This is a left rotation by 2 bits on an 8-bit value.
469static int64_t convertModeToSetregFormat(int64_t Mode) {
470 assert(isUInt<8>(Mode) && "Mode expected to be 8-bit");
471 return llvm::rotl<uint8_t>(static_cast<uint8_t>(Mode), /*R=*/2);
472}
473
474bool AMDGPULowerVGPREncoding::updateSetregModeImm(MachineInstr &MI,
475 int64_t ModeValue) {
476 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
477
478 // Convert from S_SET_VGPR_MSB format to MODE register format
479 int64_t SetregMode = convertModeToSetregFormat(ModeValue);
480
481 MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);
482 int64_t OldImm = ImmOp->getImm();
483 int64_t NewImm =
484 (OldImm & ~AMDGPU::Hwreg::VGPR_MSB_MASK) | (SetregMode << VGPRMSBShift);
485 ImmOp->setImm(NewImm);
486 return NewImm != OldImm;
487}
488
489bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
490 using namespace AMDGPU::Hwreg;
491
492 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
493 "only S_SETREG_IMM32_B32 needs to be handled");
494
495 LLVM_DEBUG(dbgs() << " handleSetregMode: " << MI);
496
497 MachineOperand *SIMM16Op = TII->getNamedOperand(MI, AMDGPU::OpName::simm16);
498 assert(SIMM16Op && "SIMM16Op must be present");
499
500 auto [HwRegId, Offset, Size] = HwregEncoding::decode(SIMM16Op->getImm());
501 (void)Offset;
502 LLVM_DEBUG(dbgs() << " HwRegId=" << HwRegId << " Offset=" << Offset
503 << " Size=" << Size << '\n');
504 if (HwRegId != ID_MODE) {
505 LLVM_DEBUG(dbgs() << " -> not ID_MODE, skipping\n");
506 return false;
507 }
508
509 int64_t ModeValue = CurrentMode.encode();
510 LLVM_DEBUG({
511 dbgs() << " CurrentMode=";
512 CurrentMode.print(dbgs());
513 dbgs() << " encoded=0x" << Twine::utohexstr(ModeValue)
514 << " VGPRMSBShift=" << VGPRMSBShift << '\n';
515 });
516
517 // Case 1: Size <= 12 - the original instruction uses imm32[0:Size-1], so
518 // imm32[12:19] is unused. Safe to set imm32[12:19] to the correct VGPR
519 // MSBs.
520 if (Size <= VGPRMSBShift) {
521 LLVM_DEBUG(dbgs() << " Case 1: Size(" << Size << ") <= VGPRMSBShift("
522 << VGPRMSBShift
523 << "), treating as mode scope boundary\n");
524 // This instruction is at the boundary of the old mode's control range.
525 // Reset CurrentMode so that the next setMode call can freely piggyback
526 // the required mode into bits[12:19] without triggering Rewritten.
527 MostRecentModeSet = &MI;
528 CurrentMode = {};
529 bool Changed = updateSetregModeImm(MI, 0);
530 LLVM_DEBUG(dbgs() << " -> reset CurrentMode, cleared bits[12:19]: "
531 << MI);
532 return Changed;
533 }
534
535 // Case 2: Size > 12 - the original instruction uses bits beyond 11, so we
536 // cannot arbitrarily modify imm32[12:19]. Check if it already matches VGPR
537 // MSBs. Note: imm32[12:19] is in MODE register format, while ModeValue is
538 // in S_SET_VGPR_MSB format, so we need to convert before comparing.
539 MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);
540 assert(ImmOp && "ImmOp must be present");
541 int64_t ImmBits12To19 = (ImmOp->getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;
542 int64_t SetregModeValue = convertModeToSetregFormat(ModeValue);
543 LLVM_DEBUG(dbgs() << " Case 2: Size(" << Size << ") > VGPRMSBShift, "
544 << "ImmBits12To19=0x" << Twine::utohexstr(ImmBits12To19)
545 << " SetregModeValue=0x"
546 << Twine::utohexstr(SetregModeValue) << '\n');
547 if (ImmBits12To19 == SetregModeValue) {
548 // Already correct, but we must invalidate MostRecentModeSet because this
549 // instruction will overwrite mode[12:19]. We can't update this instruction
550 // via piggybacking (bits[12:19] are meaningful), so if CurrentMode changes,
551 // a new s_set_vgpr_msb will be inserted after this instruction.
552 MostRecentModeSet = nullptr;
553 LLVM_DEBUG(dbgs() << " -> bits[12:19] already correct, "
554 "invalidated MostRecentModeSet\n");
555 return false;
556 }
557
558 // imm32[12:19] doesn't match VGPR MSBs - insert s_set_vgpr_msb after
559 // the original instruction to restore the correct value. Insert S_NOP
560 // to avoid the GFX1250 hazard where S_SET_VGPR_MSB immediately after
561 // S_SETREG_IMM32_B32(MODE) is silently dropped.
562 MachineBasicBlock::iterator InsertPt = std::next(MI.getIterator());
563 BuildMI(*MBB, InsertPt, MI.getDebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
564 MostRecentModeSet = BuildMI(*MBB, InsertPt, MI.getDebugLoc(),
565 TII->get(AMDGPU::S_SET_VGPR_MSB))
566 .addImm(ModeValue);
567 LLVM_DEBUG(dbgs() << " -> inserted S_SET_VGPR_MSB after setreg: "
568 << *MostRecentModeSet);
569 return true;
570}
571
572bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
573 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
574 if (!ST.has1024AddressableVGPRs())
575 return false;
576
577 TII = ST.getInstrInfo();
578 TRI = ST.getRegisterInfo();
579
580 LLVM_DEBUG(dbgs() << "*** AMDGPULowerVGPREncoding on " << MF.getName()
581 << " ***\n");
582
583 bool Changed = false;
584 ClauseLen = ClauseRemaining = 0;
585 CurrentMode = {};
586 for (auto &MBB : MF) {
587 MostRecentModeSet = nullptr;
588 XCntIsZero = false;
589 this->MBB = &MBB;
590
591 LLVM_DEBUG(dbgs() << "BB#" << MBB.getNumber() << ' ' << MBB.getName()
592 << ":\n");
593
594 for (auto &MI : llvm::make_early_inc_range(MBB.instrs())) {
595 if (MI.isMetaInstruction())
596 continue;
597
598 if (MI.isTerminator() || MI.isCall()) {
599 LLVM_DEBUG(dbgs() << " terminator/call: " << MI);
600 if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
601 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
602 CurrentMode = {};
603 else
604 resetMode(MI.getIterator());
605 continue;
606 }
607
608 if (MI.isInlineAsm()) {
609 LLVM_DEBUG(dbgs() << " inline asm: " << MI);
610 if (TII->hasVGPRUses(MI))
611 resetMode(MI.getIterator());
612 continue;
613 }
614
615 if (MI.getOpcode() == AMDGPU::S_CLAUSE) {
616 assert(!ClauseRemaining && "Nested clauses are not supported");
617 ClauseLen = MI.getOperand(0).getImm();
618 ClauseBreaks = (ClauseLen >> 8) & 15;
619 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;
620 Clause = &MI;
621 LLVM_DEBUG(dbgs() << " clause: len=" << ClauseLen
622 << " breaks=" << ClauseBreaks << '\n');
623 continue;
624 }
625
626 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
627 ST.hasSetregVGPRMSBFixup()) {
628 Changed |= handleSetregMode(MI);
629 continue;
630 }
631
632 // If XCNT is known to be zero then any S_WAIT_XCNT instruction is
633 // redundant and can be removed.
634 if (MI.getOpcode() == AMDGPU::S_WAIT_XCNT && XCntIsZero) {
635 MI.eraseFromBundle();
636 Changed = true;
637 continue;
638 }
639
640 Changed |= runOnMachineInstr(MI);
641
642 // Any VMEM or SMEM instruction may increment XCNT.
644 XCntIsZero = false;
645
646 if (ClauseRemaining)
647 --ClauseRemaining;
648 }
649
650 // Reset the mode if we are falling through.
651 LLVM_DEBUG(dbgs() << " end of BB, resetting mode\n");
652 resetMode(MBB.instr_end());
653 }
654
655 return Changed;
656}
657
658class AMDGPULowerVGPREncodingLegacy : public MachineFunctionPass {
659public:
660 static char ID;
661
662 AMDGPULowerVGPREncodingLegacy() : MachineFunctionPass(ID) {}
663
664 bool runOnMachineFunction(MachineFunction &MF) override {
665 return AMDGPULowerVGPREncoding().run(MF);
666 }
667
668 void getAnalysisUsage(AnalysisUsage &AU) const override {
669 AU.setPreservesCFG();
671 }
672};
673
674} // namespace
675
676char AMDGPULowerVGPREncodingLegacy::ID = 0;
677
678char &llvm::AMDGPULowerVGPREncodingLegacyID = AMDGPULowerVGPREncodingLegacy::ID;
679
680INITIALIZE_PASS(AMDGPULowerVGPREncodingLegacy, DEBUG_TYPE,
681 "AMDGPU Lower VGPR Encoding", false, false)
682
686 if (!AMDGPULowerVGPREncoding().run(MF))
687 return PreservedAnalyses::all();
688
690}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements the C++20 <bit> header.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isVOP2(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
static Twine utohexstr(uint64_t Val)
Definition Twine.h:385
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
DWARFExpression::Operation Op
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:188
char & AMDGPULowerVGPREncodingLegacyID
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T rotl(T V, int R)
Definition bit.h:384
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)