34#define DEBUG_TYPE "amdgpu-regbanklegalize"
43template <
typename SrcTy>
45m_GAMDGPUReadAnyLane(
const SrcTy &Src) {
59 return "AMDGPU Register Bank Legalize";
79 "AMDGPU Register Bank Legalize",
false,
false)
86char AMDGPURegBankLegalize::
ID = 0;
91 return new AMDGPURegBankLegalize();
96 static std::mutex GlobalMutex;
99 std::lock_guard<std::mutex> Lock(GlobalMutex);
100 auto [It, Inserted] = CacheForRuleSet.
try_emplace(ST.getGeneration());
102 It->second = std::make_unique<RegBankLegalizeRules>(ST,
MRI);
104 It->second->refreshRefs(ST,
MRI);
124 : B(B), MRI(*B.getMRI()), TRI(TRI),
125 SgprRB(&RBI.getRegBank(
AMDGPU::SGPRRegBankID)),
126 VgprRB(&RBI.getRegBank(
AMDGPU::VGPRRegBankID)),
127 VccRB(&RBI.getRegBank(
AMDGPU::VCCRegBankID)) {};
130 std::pair<MachineInstr *, Register>
tryMatch(
Register Src,
unsigned Opcode);
143 if (RB && RB->
getID() == AMDGPU::VCCRegBankID)
147 return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) ==
LLT::scalar(1);
150std::pair<MachineInstr *, Register>
163 if (!UnMerge || UnMerge->getNumDefs() != DefRegs.
size())
165 for (
unsigned I = 1;
I < DefRegs.
size(); ++
I) {
166 if (UnMerge->getReg(
I) != DefRegs[
I])
169 return UnMerge->getSourceReg();
177 for (
unsigned i = 0; i <
Merge->getNumSources(); ++i) {
180 m_GAMDGPUReadAnyLane(
m_Reg(Src))))
184 return ReadAnyLaneSrcs;
217 if (ReadAnyLaneSrcs.
empty())
225 return ReadAnyLaneSrcs;
235 int Idx = UnMerge->findRegisterDefOperandIdx(Src,
nullptr);
237 if (!
Merge || UnMerge->getNumDefs() !=
Merge->getNumSources())
241 if (MRI.getType(Src) != MRI.getType(SrcRegIdx))
244 auto [RALEl, RALElSrc] =
tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE);
254 MRI.replaceRegWith(Dst, Src);
256 B.buildCopy(Dst, Src);
261 Register Dst = Copy.getOperand(0).getReg();
262 Register Src = Copy.getOperand(1).getReg();
265 if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB)
266 : !TRI.isVGPR(MRI, Dst))
270 if (!Src.isVirtual() || MRI.getRegClassOrNull(Src))
275 if (SrcMI.
getOpcode() == AMDGPU::G_BITCAST)
278 B.setInstrAndDebugLoc(Copy);
280 if (ReadAnyLaneSrcRegs.
empty())
284 if (ReadAnyLaneSrcRegs.
size() == 1) {
285 ReadAnyLaneSrc = ReadAnyLaneSrcRegs[0];
288 auto Merge = B.buildMergeLikeInstr({VgprRB, MRI.getType(RALDst)},
290 ReadAnyLaneSrc =
Merge.getReg(0);
293 if (SrcMI.
getOpcode() != AMDGPU::G_BITCAST) {
306 auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, ReadAnyLaneSrc);
321 if (!Dst.isVirtual() || !Src.isVirtual())
331 if (
isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) {
332 auto [Trunc, TruncS32Src] =
tryMatch(Src, AMDGPU::G_TRUNC);
333 assert(Trunc && MRI.getType(TruncS32Src) == S32 &&
334 "sgpr S1 must be result of G_TRUNC of sgpr S32");
338 auto One = B.buildConstant({SgprRB, S32}, 1);
339 auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One);
340 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
352 if (MRI.getType(Src) != S1)
355 auto [Trunc, TruncSrc] =
tryMatch(Src, AMDGPU::G_TRUNC);
359 LLT DstTy = MRI.getType(Dst);
360 LLT TruncSrcTy = MRI.getType(TruncSrc);
362 if (DstTy == TruncSrcTy) {
363 MRI.replaceRegWith(Dst, TruncSrc);
370 if (DstTy == S32 && TruncSrcTy == S64) {
371 auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc);
372 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
377 if (DstTy == S64 && TruncSrcTy == S32) {
378 B.buildMergeLikeInstr(
MI.getOperand(0).getReg(),
379 {TruncSrc, B.buildUndef({SgprRB, S32})});
384 if (DstTy ==
S32 && TruncSrcTy ==
S16) {
385 B.buildAnyExt(Dst, TruncSrc);
390 if (DstTy ==
S16 && TruncSrcTy ==
S32) {
391 B.buildTrunc(Dst, TruncSrc);
402 for (
unsigned i = 0; i <
MRI.getNumVirtRegs(); ++i) {
408 if (RB && RB->
getID() == AMDGPU::SGPRRegBankID) {
423 const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
424 GISelCSEAnalysisWrapper &
Wrapper =
425 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
427 GISelObserverWrapper Observer;
431 B.setCSEInfo(&CSEInfo);
432 B.setChangeObserver(Observer);
434 RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
435 RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
439 const RegisterBankInfo &RBI = *
ST.getRegBankInfo();
441 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
444 const RegBankLegalizeRules &RBLRules =
getRules(ST,
MRI);
447 RegBankLegalizeHelper RBLHelper(
B, MUI, RBI, RBLRules);
451 for (MachineBasicBlock &
MBB : MF) {
452 for (MachineInstr &
MI :
MBB) {
457 for (MachineInstr *
MI : AllInst) {
458 if (!
MI->isPreISelOpcode())
461 unsigned Opc =
MI->getOpcode();
463 if (
Opc == AMDGPU::G_PHI) {
464 if (!RBLHelper.applyMappingPHI(*
MI))
471 if (
Opc == AMDGPU::G_BUILD_VECTOR ||
Opc == AMDGPU::G_UNMERGE_VALUES ||
472 Opc == AMDGPU::G_MERGE_VALUES ||
Opc == AMDGPU::G_CONCAT_VECTORS ||
473 Opc == AMDGPU::G_BITCAST) {
474 RBLHelper.applyMappingTrivial(*
MI);
479 if (
Opc == G_FREEZE &&
481 RBLHelper.applyMappingTrivial(*
MI);
485 if ((
Opc == AMDGPU::G_CONSTANT ||
Opc == AMDGPU::G_FCONSTANT ||
486 Opc == AMDGPU::G_IMPLICIT_DEF)) {
490 assert(
MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
497 if (!RBLHelper.findRuleAndApplyMapping(*
MI))
524 AMDGPURegBankLegalizeCombiner Combiner(
B, *
ST.getRegisterInfo(), RBI);
526 for (MachineBasicBlock &
MBB : MF) {
528 if (
MI.getOpcode() == AMDGPU::COPY) {
529 Combiner.tryCombineCopy(
MI);
532 if (
MI.getOpcode() == AMDGPU::G_ANYEXT) {
533 Combiner.tryCombineS1AnyExt(
MI);
540 "Registers with sgpr reg bank and S1 LLT are not legal after "
541 "AMDGPURegBankLegalize. Should lower to sgpr S32");
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Target-Independent Code Generator Pass Configuration Options pass.
Register tryMatchUnmergeDefs(SmallVectorImpl< Register > &DefRegs)
void replaceRegWithOrBuildCopy(Register Dst, Register Src)
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
bool isLaneMask(Register Reg)
void tryCombineS1AnyExt(MachineInstr &MI)
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
SmallVector< Register > tryMatchMergeReadAnyLane(GMergeLikeInstr *Merge)
void tryCombineCopy(MachineInstr &MI)
bool tryEliminateReadAnyLane(MachineInstr &Copy)
SmallVector< Register > getReadAnyLaneSrcs(Register Src)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
void addObserver(GISelChangeObserver *O)
Represents G_BUILD_VECTOR, G_CONCAT_VECTORS or G_MERGE_VALUES.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createAMDGPURegBankLegalizePass()
LLVM_ABI void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver=nullptr)
char & AMDGPURegBankLegalizeID