54#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
58class AMDGPULowerVGPREncoding {
59 static constexpr unsigned OpNum = 4;
60 static constexpr unsigned BitsPerField = 2;
61 static constexpr unsigned NumFields = 4;
62 static constexpr unsigned ModeWidth = NumFields * BitsPerField;
63 static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
64 static constexpr unsigned VGPRMSBShift =
69 std::optional<unsigned> MSBits;
71 bool update(
const OpMode &New,
bool &Rewritten) {
74 if (*New.MSBits != MSBits.value_or(0)) {
76 Rewritten |= MSBits.has_value();
87 bool update(
const ModeTy &New,
bool &Rewritten) {
89 for (
unsigned I :
seq(OpNum))
90 Updated |=
Ops[
I].update(New.Ops[
I], Rewritten);
98 V |=
Op.MSBits.value_or(0) << (
I * 2);
103 static const char *FieldNames[] = {
"src0",
"src1",
"src2",
"dst"};
108 OS << FieldNames[
I] <<
'=';
119 bool isCompatible(
const ModeTy NewMode)
const {
120 for (
unsigned I :
seq(OpNum)) {
121 if (!NewMode.Ops[
I].MSBits.has_value())
123 if (
Ops[
I].MSBits.value_or(0) != NewMode.Ops[
I].MSBits.value_or(0))
150 unsigned ClauseRemaining;
153 unsigned ClauseBreaks;
168 for (OpMode &
Op :
Mode.Ops)
184 const AMDGPU::OpName
Ops[OpNum],
185 const AMDGPU::OpName *Ops2 =
nullptr);
215bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
218 dbgs() <<
" setMode: NewMode=";
219 NewMode.print(
dbgs());
220 dbgs() <<
" CurrentMode=";
221 CurrentMode.print(
dbgs());
222 dbgs() <<
" MostRecentModeSet=" << (MostRecentModeSet ?
"yes" :
"null");
223 if (
I !=
MBB->instr_end())
224 dbgs() <<
" before: " << *
I;
226 dbgs() <<
" at end\n";
230 int64_t OldModeBits = CurrentMode.encode() << ModeWidth;
232 bool Rewritten =
false;
233 if (!CurrentMode.update(NewMode, Rewritten)) {
238 LLVM_DEBUG(
dbgs() <<
" Rewritten=" << Rewritten <<
" after update\n");
240 if (MostRecentModeSet && !Rewritten) {
243 if (MostRecentModeSet->
getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
246 int64_t OldModeBits =
Op.getImm() & (ModeMask << ModeWidth);
247 Op.setImm(CurrentMode.encode() | OldModeBits);
249 << *MostRecentModeSet);
252 "unexpected MostRecentModeSet opcode");
253 updateSetregModeImm(*MostRecentModeSet, CurrentMode.encode());
255 << *MostRecentModeSet);
262 InsertPt = handleCoissue(InsertPt);
267 if (needNopBeforeSetVGPRMSB(
I))
270 BuildMI(*
MBB, InsertPt, {},
TII->get(AMDGPU::S_SET_VGPR_MSB))
271 .
addImm(NewMode.encode() | OldModeBits);
273 << *MostRecentModeSet);
280 if (
MI.getOpcode() == AMDGPU::S_WAIT_XCNT)
281 MI.eraseFromBundle();
285 CurrentMode = NewMode;
289std::optional<unsigned>
296 if (!RC || !
TRI->isVGPRClass(RC))
299 unsigned Idx =
TRI->getHWRegIndex(
Reg);
303void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode,
305 const AMDGPU::OpName
Ops[OpNum],
306 const AMDGPU::OpName *Ops2) {
309 for (
unsigned I = 0;
I < OpNum; ++
I) {
312 std::optional<unsigned> MSBits;
314 MSBits = getMSBs(*
Op);
317 if (MSBits.has_value() && Ops2) {
320 std::optional<unsigned> MSBits2;
321 MSBits2 = getMSBs(*Op2);
322 if (MSBits2.has_value() && MSBits != MSBits2)
328 if (!MSBits.has_value() && Ops2) {
329 Op =
TII->getNamedOperand(
MI, Ops2[
I]);
331 MSBits = getMSBs(*
Op);
334 if (!MSBits.has_value())
340 if (
Ops[
I] == AMDGPU::OpName::src2 && !
Op->isDef() &&
Op->isTied() &&
343 TII->hasVALU32BitEncoding(
MI.getOpcode()))))
346 NewMode.Ops[
I].MSBits = MSBits.value();
350bool AMDGPULowerVGPREncoding::runOnMachineInstr(
MachineInstr &
MI) {
354 computeMode(NewMode,
MI,
Ops.first,
Ops.second);
356 dbgs() <<
" runOnMachineInstr: ";
358 dbgs() <<
" computed NewMode=";
359 NewMode.print(
dbgs());
360 dbgs() <<
" compatible=" << CurrentMode.isCompatible(NewMode) <<
'\n';
362 if (!CurrentMode.isCompatible(NewMode) &&
MI.isCommutable() &&
363 TII->commuteInstruction(
MI)) {
364 ModeTy NewModeCommuted;
365 computeMode(NewModeCommuted,
MI,
Ops.first,
Ops.second);
367 dbgs() <<
" commuted NewMode=";
368 NewModeCommuted.print(
dbgs());
369 dbgs() <<
" compatible=" << CurrentMode.isCompatible(NewModeCommuted)
372 if (CurrentMode.isCompatible(NewModeCommuted)) {
377 CurrentMode.update(NewModeCommuted, Unused);
382 if (!
TII->commuteInstruction(
MI))
385 return setMode(NewMode,
MI.getIterator());
387 assert(!
TII->hasVGPRUses(
MI) ||
MI.isMetaInstruction() ||
MI.isPseudo());
393 if (!ClauseRemaining)
398 if (ClauseRemaining == ClauseLen) {
399 I =
Clause->getPrevNode()->getIterator();
407 Clause->eraseFromBundle();
417 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));
430 unsigned Opc =
MI->getOpcode();
432 Opc == AMDGPU::S_DELAY_ALU;
435 while (
I !=
MBB->begin()) {
436 auto Prev = std::prev(
I);
437 if (!isProgramStateInstr(&*Prev))
445bool AMDGPULowerVGPREncoding::needNopBeforeSetVGPRMSB(
447 while (
I !=
MBB->begin()) {
449 if (
I->getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
451 TII->getNamedOperand(*
I, AMDGPU::OpName::simm16);
457 if (!
I->isMetaInstruction())
469static int64_t convertModeToSetregFormat(int64_t
Mode) {
474bool AMDGPULowerVGPREncoding::updateSetregModeImm(
MachineInstr &
MI,
476 assert(
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
479 int64_t SetregMode = convertModeToSetregFormat(ModeValue);
482 int64_t OldImm = ImmOp->
getImm();
484 (OldImm &
~AMDGPU::Hwreg::VGPR_MSB_MASK) | (SetregMode << VGPRMSBShift);
486 return NewImm != OldImm;
492 assert(
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
493 "only S_SETREG_IMM32_B32 needs to be handled");
498 assert(SIMM16Op &&
"SIMM16Op must be present");
503 <<
" Size=" <<
Size <<
'\n');
504 if (HwRegId != ID_MODE) {
509 int64_t ModeValue = CurrentMode.encode();
511 dbgs() <<
" CurrentMode=";
512 CurrentMode.print(
dbgs());
514 <<
" VGPRMSBShift=" << VGPRMSBShift <<
'\n';
520 if (
Size <= VGPRMSBShift) {
523 <<
"), treating as mode scope boundary\n");
527 MostRecentModeSet = &
MI;
529 bool Changed = updateSetregModeImm(
MI, 0);
530 LLVM_DEBUG(
dbgs() <<
" -> reset CurrentMode, cleared bits[12:19]: "
540 assert(ImmOp &&
"ImmOp must be present");
541 int64_t ImmBits12To19 = (ImmOp->
getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;
542 int64_t SetregModeValue = convertModeToSetregFormat(ModeValue);
545 <<
" SetregModeValue=0x"
547 if (ImmBits12To19 == SetregModeValue) {
552 MostRecentModeSet =
nullptr;
554 "invalidated MostRecentModeSet\n");
564 MostRecentModeSet =
BuildMI(*
MBB, InsertPt,
MI.getDebugLoc(),
565 TII->get(AMDGPU::S_SET_VGPR_MSB))
568 << *MostRecentModeSet);
574 if (!ST.has1024AddressableVGPRs())
577 TII = ST.getInstrInfo();
578 TRI = ST.getRegisterInfo();
584 ClauseLen = ClauseRemaining = 0;
586 for (
auto &
MBB : MF) {
587 MostRecentModeSet =
nullptr;
595 if (
MI.isMetaInstruction())
598 if (
MI.isTerminator() ||
MI.isCall()) {
600 if (
MI.getOpcode() == AMDGPU::S_ENDPGM ||
601 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
604 resetMode(
MI.getIterator());
608 if (
MI.isInlineAsm()) {
610 if (
TII->hasVGPRUses(
MI))
611 resetMode(
MI.getIterator());
615 if (
MI.getOpcode() == AMDGPU::S_CLAUSE) {
616 assert(!ClauseRemaining &&
"Nested clauses are not supported");
617 ClauseLen =
MI.getOperand(0).getImm();
618 ClauseBreaks = (ClauseLen >> 8) & 15;
619 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;
622 <<
" breaks=" << ClauseBreaks <<
'\n');
626 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
627 ST.hasSetregVGPRMSBFixup()) {
634 if (
MI.getOpcode() == AMDGPU::S_WAIT_XCNT && XCntIsZero) {
635 MI.eraseFromBundle();
652 resetMode(
MBB.instr_end());
665 return AMDGPULowerVGPREncoding().run(MF);
676char AMDGPULowerVGPREncodingLegacy::ID = 0;
681 "AMDGPU Lower VGPR Encoding",
false,
false)
686 if (!AMDGPULowerVGPREncoding().run(MF))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
This file implements the C++20 <bit> header.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Wrapper class representing physical registers. Should be passed by value.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVMEM(const MachineInstr &MI)
static bool isSMRD(const MachineInstr &MI)
static bool isVOP2(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
static Twine utohexstr(uint64_t Val)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
DWARFExpression::Operation Op
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
char & AMDGPULowerVGPREncodingLegacyID
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
constexpr T rotl(T V, int R)
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)