34#define DEBUG_TYPE "amdgpu-regbanklegalize"
43template <
typename SrcTy>
45m_GAMDGPUReadAnyLane(
const SrcTy &Src) {
59 return "AMDGPU Register Bank Legalize";
79 "AMDGPU Register Bank Legalize",
false,
false)
86char AMDGPURegBankLegalize::
ID = 0;
91 return new AMDGPURegBankLegalize();
96 static std::mutex GlobalMutex;
99 std::lock_guard<std::mutex> Lock(GlobalMutex);
100 auto [It, Inserted] = CacheForRuleSet.
try_emplace(ST.getGeneration());
102 It->second = std::make_unique<RegBankLegalizeRules>(ST,
MRI);
104 It->second->refreshRefs(ST,
MRI);
124 : B(B), MRI(*B.getMRI()), TRI(TRI),
125 SgprRB(&RBI.getRegBank(
AMDGPU::SGPRRegBankID)),
126 VgprRB(&RBI.getRegBank(
AMDGPU::VGPRRegBankID)),
127 VccRB(&RBI.getRegBank(
AMDGPU::VCCRegBankID)) {};
130 std::pair<MachineInstr *, Register>
tryMatch(
Register Src,
unsigned Opcode);
142 if (RB && RB->
getID() == AMDGPU::VCCRegBankID)
146 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) ==
LLT::scalar(1);
149std::pair<MachineInstr *, Register>
157std::pair<GUnmerge *, int>
160 if (ReadAnyLane->
getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
161 return {
nullptr, -1};
165 return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc,
nullptr)};
167 return {
nullptr, -1};
190 unsigned NumElts =
Merge->getNumSources();
192 if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
196 for (
unsigned i = 1; i < NumElts; ++i) {
198 if (UnmergeI != Unmerge || (
unsigned)IdxI != i)
201 return Unmerge->getSourceReg();
211 int Idx = UnMerge->findRegisterDefOperandIdx(Src,
nullptr);
213 if (!
Merge || UnMerge->getNumDefs() !=
Merge->getNumSources())
217 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
220 auto [RALEl, RALElSrc] =
tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
230 MRI.replaceRegWith(Dst, Src);
232 B.buildCopy(Dst, Src);
237 Register Dst = Copy.getOperand(0).getReg();
238 Register Src = Copy.getOperand(1).getReg();
241 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
242 : !TRI.isVGPR(MRI, Dst))
246 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
251 if (SrcMI.
getOpcode() == AMDGPU::G_BITCAST)
259 if (SrcMI.
getOpcode() != AMDGPU::G_BITCAST) {
272 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
287 if (!Dst.isVirtual() || !Src.isVirtual())
297 if (
isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
298 auto [Trunc, TruncS32Src] =
tryMatch(Src, AMDGPU::G_TRUNC);
299 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
300 "sgpr S1 must be result of G_TRUNC of sgpr S32");
304 auto One = B.buildConstant({SgprRB, S32}, 1);
305 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
306 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
318 if (MRI.getType(Src) != S1)
321 auto [Trunc, TruncSrc] =
tryMatch(Src, AMDGPU::G_TRUNC);
325 LLT DstTy = MRI.getType(Dst);
326 LLT TruncSrcTy = MRI.getType(TruncSrc);
328 if (DstTy == TruncSrcTy) {
329 MRI.replaceRegWith(Dst, TruncSrc);
336 if (DstTy == S32 && TruncSrcTy == S64) {
337 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
338 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
343 if (DstTy == S64 && TruncSrcTy == S32) {
344 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
345 {TruncSrc, B.buildUndef({SgprRB, S32})});
350 if (DstTy ==
S32 && TruncSrcTy ==
S16) {
351 B.buildAnyExt(Dst, TruncSrc);
356 if (DstTy ==
S16 && TruncSrcTy ==
S32) {
357 B.buildTrunc(Dst, TruncSrc);
368 for (
unsigned i = 0; i <
MRI.getNumVirtRegs(); ++i) {
374 if (RB && RB->
getID() == AMDGPU::SGPRRegBankID) {
389 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
390 GISelCSEAnalysisWrapper &
Wrapper =
391 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
393 GISelObserverWrapper Observer;
397 B.setCSEInfo(&CSEInfo);
398 B.setChangeObserver(Observer);
400 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
401 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
405 const RegisterBankInfo &RBI = *
ST.getRegBankInfo();
407 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
410 const RegBankLegalizeRules &RBLRules =
getRules(ST,
MRI);
413 RegBankLegalizeHelper RBLHelper(
B, MUI, RBI, RBLRules);
417 for (MachineBasicBlock &
MBB : MF) {
418 for (MachineInstr &
MI :
MBB) {
423 for (MachineInstr *
MI : AllInst) {
424 if (!
MI->isPreISelOpcode())
427 unsigned Opc =
MI->getOpcode();
429 if (
Opc == AMDGPU::G_PHI) {
430 RBLHelper.applyMappingPHI(*
MI);
436 if (
Opc == AMDGPU::G_BUILD_VECTOR ||
Opc == AMDGPU::G_UNMERGE_VALUES ||
437 Opc == AMDGPU::G_MERGE_VALUES ||
Opc == AMDGPU::G_BITCAST) {
438 RBLHelper.applyMappingTrivial(*
MI);
443 if (
Opc == G_FREEZE &&
445 RBLHelper.applyMappingTrivial(*
MI);
449 if ((
Opc == AMDGPU::G_CONSTANT ||
Opc == AMDGPU::G_FCONSTANT ||
450 Opc == AMDGPU::G_IMPLICIT_DEF)) {
454 assert(
MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
461 RBLHelper.findRuleAndApplyMapping(*
MI);
487 AMDGPURegBankLegalizeCombiner Combiner(
B, *
ST.getRegisterInfo(), RBI);
489 for (MachineBasicBlock &
MBB : MF) {
491 if (
MI.getOpcode() == AMDGPU::COPY) {
492 Combiner.tryCombineCopy(
MI);
495 if (
MI.getOpcode() == AMDGPU::G_ANYEXT) {
496 Combiner.tryCombineS1AnyExt(
MI);
503 "Registers with sgpr reg bank and S1 LLT are not legal after "
504 "AMDGPURegBankLegalize. Should lower to sgpr S32");
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Target-Independent Code Generator Pass Configuration Options pass.
std::pair< GUnmerge *, int > tryMatchRALFromUnmerge(Register Src)
void replaceRegWithOrBuildCopy(Register Dst, Register Src)
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
bool isLaneMask(Register Reg)
void tryCombineS1AnyExt(MachineInstr &MI)
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
Register getReadAnyLaneSrc(Register Src)
void tryCombineCopy(MachineInstr &MI)
bool tryEliminateReadAnyLane(MachineInstr &Copy)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
void addObserver(GISelChangeObserver *O)
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createAMDGPURegBankLegalizePass()
LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)
char & AMDGPURegBankLegalizeID