doxygen/AMDGPULowerVGPREncoding_8cpp_source.html

//===- AMDGPULowerVGPREncoding.cpp - lower VGPRs above v255 ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Lower VGPRs above first 256 on gfx1250.

///

/// The pass scans used VGPRs and inserts S_SET_VGPR_MSB instructions to switch

/// VGPR addressing mode. The mode change is effective until the next change.

/// This instruction provides high bits of a VGPR address for four of the

/// operands: vdst, src0, src1, and src2, or other 4 operands depending on the

/// instruction encoding. If bits are set they are added as MSB to the

/// corresponding operand VGPR number.

///

/// There is no need to replace actual register operands because encoding of the

/// high and low VGPRs is the same. I.e. v0 has the encoding 0x100, so does

/// v256. v1 has the encoding 0x101 and v257 has the same encoding. So high

/// VGPRs will survive until actual encoding and will result in a same actual

/// bit encoding.

///

/// As a result the pass only inserts S_SET_VGPR_MSB to provide an actual offset

/// to a VGPR address of the subseqent instructions. The InstPrinter will take

/// care of the printing a low VGPR instead of a high one. In prinicple this

/// shall be viable to print actual high VGPR numbers, but that would disagree

/// with a disasm printing and create a situation where asm text is not

/// deterministic.

///

/// This pass creates a convention where non-fall through basic blocks shall

/// start with all 4 MSBs zero. Otherwise a disassembly would not be readable.

/// An optimization here is possible but deemed not desirable because of the

/// readbility concerns.

///

/// Consequentially the ABI is set to expect all 4 MSBs to be zero on entry.

/// The pass must run very late in the pipeline to make sure no changes to VGPR

/// operands will be made after it.

//

//===----------------------------------------------------------------------===//


#include "AMDGPULowerVGPREncoding.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIInstrInfo.h"

#include "llvm/ADT/PackedVector.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"


namespace {


class AMDGPULowerVGPREncoding {

  static constexpr unsigned OpNum = 4;

  static constexpr unsigned BitsPerField = 2;

  static constexpr unsigned NumFields = 4;

  static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;

  static constexpr unsigned ModeWidth = NumFields * BitsPerField;

  static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;

  using ModeType = PackedVector<unsigned, BitsPerField,

                                std::bitset<BitsPerField * NumFields>>;


  class ModeTy : public ModeType {

  public:

    // bitset constructor will set all bits to zero

    ModeTy() : ModeType(0) {}


    operator int64_t() const { return raw_bits().to_ulong(); }


    static ModeTy fullMask() {

      ModeTy M;

      M.raw_bits().flip();

      return M;

    }

  };


public:

  bool run(MachineFunction &MF);


private:

  const SIInstrInfo *TII;

  const SIRegisterInfo *TRI;


  // Current basic block.

  MachineBasicBlock *MBB;


  /// Most recent s_set_* instruction.

  MachineInstr *MostRecentModeSet;


  /// Current mode bits.

  ModeTy CurrentMode;


  /// Current mask of mode bits that instructions since MostRecentModeSet care

  /// about.

  ModeTy CurrentMask;


  /// Number of current hard clause instructions.

  unsigned ClauseLen;


  /// Number of hard clause instructions remaining.

  unsigned ClauseRemaining;


  /// Clause group breaks.

  unsigned ClauseBreaks;


  /// Last hard clause instruction.

  MachineInstr *Clause;


  /// Insert mode change before \p I. \returns true if mode was changed.

  bool setMode(ModeTy NewMode, ModeTy Mask,

               MachineBasicBlock::instr_iterator I);


  /// Reset mode to default.

  void resetMode(MachineBasicBlock::instr_iterator I) {

    setMode(ModeTy(), ModeTy::fullMask(), I);

  }


  /// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.

  std::optional<unsigned> getMSBs(const MachineOperand &MO) const;


  /// Handle single \p MI. \return true if changed.

  bool runOnMachineInstr(MachineInstr &MI);


  /// Compute the mode and mode mask for a single \p MI given \p Ops operands

  /// bit mapping. Optionally takes second array \p Ops2 for VOPD.

  /// If provided and an operand from \p Ops is not a VGPR, then \p Ops2

  /// is checked.

  void computeMode(ModeTy &NewMode, ModeTy &Mask, MachineInstr &MI,

                   const AMDGPU::OpName Ops[OpNum],

                   const AMDGPU::OpName *Ops2 = nullptr);


  /// Check if an instruction \p I is within a clause and returns a suitable

  /// iterator to insert mode change. It may also modify the S_CLAUSE

  /// instruction to extend it or drop the clause if it cannot be adjusted.

  MachineBasicBlock::instr_iterator

  handleClause(MachineBasicBlock::instr_iterator I);


  /// Check if an instruction \p I is immediately after another program state

  /// instruction which it cannot coissue with. If so, insert before that

  /// instruction to encourage more coissuing.

  MachineBasicBlock::instr_iterator

  handleCoissue(MachineBasicBlock::instr_iterator I);

};


bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,

                                      MachineBasicBlock::instr_iterator I) {

  assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());


  auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();


  if ((Delta & Mask.raw_bits()).none()) {

    CurrentMask |= Mask;

    return false;

  }


  if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {

    CurrentMode |= NewMode;

    CurrentMask |= Mask;


    MachineOperand &Op = MostRecentModeSet->getOperand(0);


    // Carry old mode bits from the existing instruction.

    int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);


    Op.setImm(CurrentMode | OldModeBits);

    return true;

  }


  // Record previous mode into high 8 bits of the immediate.

  int64_t OldModeBits = CurrentMode << ModeWidth;


  I = handleClause(I);

  I = handleCoissue(I);

  MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))

                          .addImm(NewMode | OldModeBits);


  CurrentMode = NewMode;

  CurrentMask = Mask;

  return true;

}


std::optional<unsigned>

AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {

  if (!MO.isReg())

    return std::nullopt;


  MCRegister Reg = MO.getReg();

  const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

  if (!RC || !TRI->isVGPRClass(RC))

    return std::nullopt;


  unsigned Idx = TRI->getHWRegIndex(Reg);

  return Idx >> 8;

}


void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode, ModeTy &Mask,

                                          MachineInstr &MI,

                                          const AMDGPU::OpName Ops[OpNum],

                                          const AMDGPU::OpName *Ops2) {

  NewMode = {};

  Mask = {};


  for (unsigned I = 0; I < OpNum; ++I) {

    MachineOperand *Op = TII->getNamedOperand(MI, Ops[I]);


    std::optional<unsigned> MSBits;

    if (Op)

      MSBits = getMSBs(*Op);


#if !defined(NDEBUG)

    if (MSBits.has_value() && Ops2) {

      auto Op2 = TII->getNamedOperand(MI, Ops2[I]);

      if (Op2) {

        std::optional<unsigned> MSBits2;

        MSBits2 = getMSBs(*Op2);

        if (MSBits2.has_value() && MSBits != MSBits2)

          llvm_unreachable("Invalid VOPD pair was created");

      }

    }

#endif


    if (!MSBits.has_value() && Ops2) {

      Op = TII->getNamedOperand(MI, Ops2[I]);

      if (Op)

        MSBits = getMSBs(*Op);

    }


    if (!MSBits.has_value())

      continue;


    // Skip tied uses of src2 of VOP2, these will be handled along with defs and

    // only vdst bit affects these operands. We cannot skip tied uses of VOP3,

    // these uses are real even if must match the vdst.

    if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&

        (SIInstrInfo::isVOP2(MI) ||

         (SIInstrInfo::isVOP3(MI) &&

          TII->hasVALU32BitEncoding(MI.getOpcode()))))

      continue;


    NewMode[I] = MSBits.value();

    Mask[I] = FieldMask;

  }

}


bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {

  auto Ops = AMDGPU::getVGPRLoweringOperandTables(MI.getDesc());

  if (Ops.first) {

    ModeTy NewMode, Mask;

    computeMode(NewMode, Mask, MI, Ops.first, Ops.second);

    return setMode(NewMode, Mask, MI.getIterator());

  }

  assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());


  return false;

}


MachineBasicBlock::instr_iterator

AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {

  if (!ClauseRemaining)

    return I;


  // A clause cannot start with a special instruction, place it right before

  // the clause.

  if (ClauseRemaining == ClauseLen) {

    I = Clause->getPrevNode()->getIterator();

    assert(I->isBundle());

    return I;

  }


  // If a clause defines breaks each group cannot start with a mode change.

  // just drop the clause.

  if (ClauseBreaks) {

    Clause->eraseFromBundle();

    ClauseRemaining = 0;

    return I;

  }


  // Otherwise adjust a number of instructions in the clause if it fits.

  // If it does not clause will just become shorter. Since the length

  // recorded in the clause is one less, increment the length after the

  // update. Note that SIMM16[5:0] must be 1-62, not 0 or 63.

  if (ClauseLen < 63)

    Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));


  ++ClauseLen;


  return I;

}


MachineBasicBlock::instr_iterator

AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {

  if (I.isEnd())

    return I;


  if (I == I->getParent()->begin())

    return I;


  MachineBasicBlock::instr_iterator Prev = std::prev(I);

  auto isProgramStateSALU = [this](MachineInstr *MI) {

    return TII->isBarrier(MI->getOpcode()) ||

           TII->isWaitcnt(MI || (SIInstrInfo::isProgramStateSALU(*MI) &&

                                 MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));

  };


  if (!isProgramStateSALU(&*Prev))

    return I;


  while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&

         isProgramStateSALU(&*Prev)) {

    --Prev;

  }

  return Prev;

}


bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  if (!ST.has1024AddressableVGPRs())

    return false;


  TII = ST.getInstrInfo();

  TRI = ST.getRegisterInfo();


  bool Changed = false;

  ClauseLen = ClauseRemaining = 0;

  CurrentMode.reset();

  CurrentMask.reset();

  for (auto &MBB : MF) {

    MostRecentModeSet = nullptr;

    this->MBB = &MBB;


    for (auto &MI : llvm::make_early_inc_range(MBB.instrs())) {

      if (MI.isMetaInstruction())

        continue;


      if (MI.isTerminator() || MI.isCall()) {

        if (MI.getOpcode() == AMDGPU::S_ENDPGM ||

            MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)

          CurrentMode.reset();

        else

          resetMode(MI.getIterator());

        continue;

      }


      if (MI.isInlineAsm()) {

        if (TII->hasVGPRUses(MI))

          resetMode(MI.getIterator());

        continue;

      }


      if (MI.getOpcode() == AMDGPU::S_CLAUSE) {

        assert(!ClauseRemaining && "Nested clauses are not supported");

        ClauseLen = MI.getOperand(0).getImm();

        ClauseBreaks = (ClauseLen >> 8) & 15;

        ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;

        Clause = &MI;

        continue;

      }


      Changed |= runOnMachineInstr(MI);


      if (ClauseRemaining)

        --ClauseRemaining;

    }


    // Reset the mode if we are falling through.

    resetMode(MBB.instr_end());

  }


  return Changed;

}


class AMDGPULowerVGPREncodingLegacy : public MachineFunctionPass {

public:

  static char ID;


  AMDGPULowerVGPREncodingLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override {

    return AMDGPULowerVGPREncoding().run(MF);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }

};


} // namespace


char AMDGPULowerVGPREncodingLegacy::ID = 0;


char &llvm::AMDGPULowerVGPREncodingLegacyID = AMDGPULowerVGPREncodingLegacy::ID;


INITIALIZE_PASS(AMDGPULowerVGPREncodingLegacy, DEBUG_TYPE,

                "AMDGPU Lower VGPR Encoding", false, false)


PreservedAnalyses

AMDGPULowerVGPREncodingPass::run(MachineFunction &MF,

                                 MachineFunctionAnalysisManager &MFAM) {

  if (!AMDGPULowerVGPREncoding().run(MF))

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  return PA;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPULowerVGPREncoding.h

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3370

I
#define I(x, y, z)
Definition MD5.cpp:57

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

PackedVector.h
This file implements the PackedVector class.

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

SIInstrInfo.h
Interface definition for SIInstrInfo.

llvm::AMDGPULowerVGPREncodingPass
Definition AMDGPULowerVGPREncoding.h:17

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::Clause
Definition DirectiveEmitter.h:279

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:336

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:184

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:175

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:606

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:331

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::PackedVector
Store a vector of values using a specific number of bits for each value.
Definition PackedVector.h:31

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIInstrInfo::isVOP2
static bool isVOP2(const MachineInstr &MI)
Definition SIInstrInfo.h:546

llvm::SIInstrInfo::isProgramStateSALU
static bool isProgramStateSALU(const MachineInstr &MI)
Definition SIInstrInfo.h:460

llvm::SIInstrInfo::isVOP3
static bool isVOP3(const MCInstrDesc &Desc)
Definition SIInstrInfo.h:554

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

unsigned

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPU::getVGPRLoweringOperandTables
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
Definition AMDGPUBaseInfo.cpp:3448

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:391

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::AMDGPULowerVGPREncodingLegacyID
char & AMDGPULowerVGPREncodingLegacyID
Definition AMDGPULowerVGPREncoding.cpp:395