46#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
52 cl::desc(
"Always modify dest registers regardless of color"),
59 cl::desc(
"Ignore balance information, always return "
60 "(1: Even, 2: Odd)."),
68 switch (
MI->getOpcode()) {
69 case AArch64::FMULSrr:
70 case AArch64::FNMULSrr:
71 case AArch64::FMULDrr:
72 case AArch64::FNMULDrr:
81 switch (
MI->getOpcode()) {
82 case AArch64::FMSUBSrrr:
83 case AArch64::FMADDSrrr:
84 case AArch64::FNMSUBSrrr:
85 case AArch64::FNMADDSrrr:
86 case AArch64::FMSUBDrrr:
87 case AArch64::FMADDDrrr:
88 case AArch64::FNMSUBDrrr:
89 case AArch64::FNMADDDrrr:
101enum class Color { Even, Odd };
103static const char *ColorNames[2] = {
"Even",
"Odd" };
108class AArch64A57FPLoadBalancingImpl {
110 bool run(MachineFunction &MF);
113 MachineRegisterInfo *MRI;
114 const TargetRegisterInfo *TRI;
115 RegisterClassInfo RCI;
118 bool colorChainSet(std::vector<Chain *> GV, MachineBasicBlock &
MBB,
120 bool colorChain(Chain *
G, Color
C, MachineBasicBlock &
MBB);
121 int scavengeRegister(Chain *
G, Color
C, MachineBasicBlock &
MBB);
122 void scanInstruction(MachineInstr *
MI,
unsigned Idx,
123 std::map<unsigned, Chain *> &Active,
124 std::vector<std::unique_ptr<Chain>> &AllChains);
125 void maybeKillChain(MachineOperand &MO,
unsigned Idx,
126 std::map<unsigned, Chain *> &RegChains);
128 Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain *> &L);
134 explicit AArch64A57FPLoadBalancingLegacy() : MachineFunctionPass(ID) {}
136 bool runOnMachineFunction(MachineFunction &MF)
override;
138 MachineFunctionProperties getRequiredProperties()
const override {
139 return MachineFunctionProperties().setNoVRegs();
142 StringRef getPassName()
const override {
143 return "A57 FP Anti-dependency breaker";
146 void getAnalysisUsage(AnalysisUsage &AU)
const override {
153char AArch64A57FPLoadBalancingLegacy::ID = 0;
156 "AArch64 A57 FP Load-Balancing",
false,
false)
218 "Chain: broken invariant. A Chain can only be killed after its last "
239 "Chain: broken invariant. A Chain can only be killed after its last "
269 unsigned OtherEnd =
Other.KillInst ?
311 if (!MF.
getSubtarget<AArch64Subtarget>().balanceFPOps())
321 for (
auto &
MBB : MF) {
328bool AArch64A57FPLoadBalancingLegacy::runOnMachineFunction(
329 MachineFunction &MF) {
332 return AArch64A57FPLoadBalancingImpl().run(MF);
338 if (AArch64A57FPLoadBalancingImpl().
run(MF)) {
349 <<
" - scanning instructions...\n");
356 std::map<unsigned, Chain*> ActiveChains;
357 std::vector<std::unique_ptr<Chain>> AllChains;
360 scanInstruction(&
MI, Idx++, ActiveChains, AllChains);
363 <<
" chains created.\n");
373 for (
auto &
I : AllChains)
376 for (
auto &
I : AllChains)
377 for (
auto &J : AllChains)
378 if (
I != J &&
I->rangeOverlapsWith(*J))
379 EC.unionSets(
I.get(), J.get());
380 LLVM_DEBUG(
dbgs() <<
"Created " << EC.getNumClasses() <<
" disjoint sets.\n");
386 std::vector<std::vector<Chain*> > V;
387 for (
const auto &E : EC) {
390 std::vector<Chain *> Cs(EC.member_begin(*E), EC.member_end());
391 if (Cs.empty())
continue;
392 V.push_back(std::move(Cs));
398 [](
const std::vector<Chain *> &
A,
const std::vector<Chain *> &
B) {
399 return A.front()->startsBefore(
B.front());
416 Changed |= colorChainSet(std::move(
I),
MBB, Parity);
421Chain *AArch64A57FPLoadBalancingImpl::getAndEraseNext(Color PreferredColor,
422 std::vector<Chain *> &L) {
434 const unsigned SizeFuzz = 1;
435 unsigned MinSize =
L.front()->size() - SizeFuzz;
436 for (
auto I =
L.begin(),
E =
L.end();
I !=
E; ++
I) {
437 if ((*I)->size() <= MinSize) {
444 if ((*I)->getPreferredColor() == PreferredColor) {
452 Chain *Ch =
L.front();
457bool AArch64A57FPLoadBalancingImpl::colorChainSet(std::vector<Chain *> GV,
458 MachineBasicBlock &
MBB,
461 LLVM_DEBUG(
dbgs() <<
"colorChainSet(): #sets=" << GV.size() <<
"\n");
472 llvm::sort(GV, [](
const Chain *G1,
const Chain *G2) {
473 if (G1->size() != G2->size())
474 return G1->size() > G2->size();
475 if (G1->requiresFixup() != G2->requiresFixup())
476 return G1->requiresFixup() > G2->requiresFixup();
478 assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
479 "Starts before not total order!");
480 return G1->startsBefore(G2);
483 Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
484 while (Chain *
G = getAndEraseNext(PreferredColor, GV)) {
486 Color
C = PreferredColor;
489 C =
G->getPreferredColor();
492 <<
", Color=" << ColorNames[(
int)
C] <<
"\n");
497 if (
G->requiresFixup() &&
C !=
G->getPreferredColor()) {
498 C =
G->getPreferredColor();
500 <<
" - not worthwhile changing; "
502 << ColorNames[(
int)
C] <<
"\n");
507 Parity += (
C == Color::Even) ?
G->size() : -
G->size();
508 PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
514int AArch64A57FPLoadBalancingImpl::scavengeRegister(Chain *
G, Color
C,
515 MachineBasicBlock &
MBB) {
518 LiveRegUnits Units(*
TRI);
519 Units.addLiveOuts(
MBB);
522 while (
I != ChainEnd) {
524 if (!
I->isDebugInstr())
525 Units.stepBackward(*
I);
530 assert(ChainBegin != ChainEnd &&
"Chain should contain instructions");
533 Units.accumulate(*
I);
534 }
while (
I != ChainBegin);
537 unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
538 auto Ord = RCI.
getOrder(
TRI->getRegClass(RegClassID));
539 for (
auto Reg : Ord) {
540 if (!Units.available(
Reg))
542 if (
C == getColor(
Reg))
549bool AArch64A57FPLoadBalancingImpl::colorChain(Chain *
G, Color
C,
550 MachineBasicBlock &
MBB) {
553 << ColorNames[(
int)
C] <<
")\n");
557 int Reg = scavengeRegister(
G,
C,
MBB);
564 std::map<unsigned, unsigned> Substs;
565 for (MachineInstr &
I : *
G) {
566 if (!
G->contains(
I) && (&
I !=
G->getKill() ||
G->isKillImmutable()))
571 std::vector<unsigned> ToErase;
572 for (
auto &U :
I.operands()) {
573 if (
U.isReg() &&
U.isUse() && Substs.find(
U.getReg()) != Substs.end()) {
575 U.setReg(Substs[OrigReg]);
579 ToErase.push_back(OrigReg);
580 }
else if (
U.isRegMask()) {
581 for (
auto J : Substs) {
582 if (
U.clobbersPhysReg(J.first))
583 ToErase.push_back(J.first);
588 for (
auto J : ToErase)
592 if (&
I !=
G->getKill()) {
593 MachineOperand &MO =
I.getOperand(0);
596 if (
G->requiresFixup() && &
I ==
G->getLast())
607 assert(Substs.size() == 0 &&
"No substitutions should be left active!");
619void AArch64A57FPLoadBalancingImpl::scanInstruction(
620 MachineInstr *
MI,
unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
621 std::vector<std::unique_ptr<Chain>> &AllChains) {
626 for (
auto &
I :
MI->uses())
627 maybeKillChain(
I, Idx, ActiveChains);
628 for (
auto &
I :
MI->defs())
629 maybeKillChain(
I, Idx, ActiveChains);
633 Register DestReg =
MI->getOperand(0).getReg();
638 auto G = std::make_unique<Chain>(
MI, Idx, getColor(DestReg));
639 ActiveChains[DestReg] =
G.get();
640 AllChains.push_back(std::move(
G));
646 Register DestReg =
MI->getOperand(0).getReg();
647 Register AccumReg =
MI->getOperand(3).getReg();
649 maybeKillChain(
MI->getOperand(1), Idx, ActiveChains);
650 maybeKillChain(
MI->getOperand(2), Idx, ActiveChains);
651 if (DestReg != AccumReg)
652 maybeKillChain(
MI->getOperand(0), Idx, ActiveChains);
654 if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
663 if (
MI->getOperand(3).isKill()) {
665 LLVM_DEBUG(
dbgs() <<
"Instruction was successfully added to chain.\n");
666 ActiveChains[AccumReg]->add(
MI, Idx, getColor(DestReg));
668 if (DestReg != AccumReg) {
669 ActiveChains[DestReg] = ActiveChains[AccumReg];
670 ActiveChains.erase(AccumReg);
676 dbgs() <<
"Cannot add to chain because accumulator operand wasn't "
677 <<
"marked <kill>!\n");
678 maybeKillChain(
MI->getOperand(3), Idx, ActiveChains);
683 auto G = std::make_unique<Chain>(
MI, Idx, getColor(DestReg));
684 ActiveChains[DestReg] =
G.get();
685 AllChains.push_back(std::move(
G));
691 for (
auto &
I :
MI->uses())
692 maybeKillChain(
I, Idx, ActiveChains);
693 for (
auto &
I :
MI->defs())
694 maybeKillChain(
I, Idx, ActiveChains);
699void AArch64A57FPLoadBalancingImpl::maybeKillChain(
700 MachineOperand &MO,
unsigned Idx,
701 std::map<unsigned, Chain *> &ActiveChains) {
709 if (MO.
isKill() && ActiveChains.find(MO.
getReg()) != ActiveChains.end()) {
714 ActiveChains.erase(MO.
getReg());
718 for (
auto I = ActiveChains.begin(),
E = ActiveChains.end();
723 I->second->setKill(
MI, Idx,
true);
724 ActiveChains.erase(
I++);
732Color AArch64A57FPLoadBalancingImpl::getColor(
unsigned Reg) {
733 if ((
TRI->getEncodingValue(
Reg) % 2) == 0)
741 return new AArch64A57FPLoadBalancingLegacy();
static bool isMul(MachineInstr *MI)
static cl::opt< unsigned > OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden)
static cl::opt< bool > TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden)
static bool isMla(MachineInstr *MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file declares the machine register scavenger class.
MachineInstr * StartInst
The important (marker) instructions.
MachineBasicBlock::iterator begin() const
bool isKillImmutable() const
Can the Kill instruction (assuming one exists) be modified?
void add(MachineInstr *MI, unsigned Idx, Color C)
Add a new instruction into the chain.
bool contains(MachineInstr &MI)
Return true if MI is a member of the chain.
Color LastColor
The "color" of LastInst.
bool requiresFixup() const
Return true if the group will require a fixup MOV at the end.
MachineInstr * getLast() const
Return the last instruction in the chain.
bool KillIsImmutable
True if KillInst cannot be modified.
bool rangeOverlapsWith(const Chain &Other) const
Return true if this chain (StartInst..KillInst) overlaps with Other.
MachineInstr * getStart() const
Return the first instruction in the chain.
unsigned size() const
Return the number of instructions in the chain.
MachineBasicBlock::iterator end() const
Return an instruction that can be used as an iterator for the end of the chain.
void setKill(MachineInstr *MI, unsigned Idx, bool Immutable)
Inform the chain that its last active register (the dest register of LastInst) is killed by MI with n...
MachineInstr * getKill() const
Return the "kill" instruction (as set with setKill()) or NULL.
unsigned StartInstIdx
The index, from the start of the basic block, that each marker appears.
Color getPreferredColor()
Return the preferred color of this chain.
std::string str() const
Return a simple string representation of the chain.
std::set< MachineInstr * > Insts
All instructions in the chain.
Chain(MachineInstr *MI, unsigned Idx, Color C)
bool startsBefore(const Chain *Other) const
Return true if this chain starts before Other.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
This represents a collection of equivalence classes and supports three efficient operations: insert a...
FunctionPass class - This class is used to implement most global optimizations.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterInfo * getTargetRegisterInfo() const
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64A57FPLoadBalancingLegacyPass()
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.