LLVM 23.0.0git
AArch64A57FPLoadBalancing.cpp
Go to the documentation of this file.
1//===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// For best-case performance on Cortex-A57, we should try to use a balanced
9// mix of odd and even D-registers when performing a critical sequence of
10// independent, non-quadword FP/ASIMD floating-point multiply or
11// multiply-accumulate operations.
12//
13// This pass attempts to detect situations where the register allocation may
14// adversely affect this load balancing and to change the registers used so as
15// to better utilize the CPU.
16//
17// Ideally we'd just take each multiply or multiply-accumulate in turn and
18// allocate it alternating even or odd registers. However, multiply-accumulates
19// are most efficiently performed in the same functional unit as their
20// accumulation operand. Therefore this pass tries to find maximal sequences
21// ("Chains") of multiply-accumulates linked via their accumulation operand,
22// and assign them all the same "color" (oddness/evenness).
23//
24// This optimization affects S-register and D-register floating point
25// multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
26// FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
27// not affected.
28//===----------------------------------------------------------------------===//
29
30#include "AArch64.h"
31#include "AArch64InstrInfo.h"
32#include "AArch64Subtarget.h"
42#include "llvm/Support/Debug.h"
44using namespace llvm;
45
46#define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
47
48// Enforce the algorithm to use the scavenged register even when the original
49// destination register is the correct color. Used for testing.
50static cl::opt<bool>
51TransformAll("aarch64-a57-fp-load-balancing-force-all",
52 cl::desc("Always modify dest registers regardless of color"),
53 cl::init(false), cl::Hidden);
54
55// Never use the balance information obtained from chains - return a specific
56// color always. Used for testing.
58OverrideBalance("aarch64-a57-fp-load-balancing-override",
59 cl::desc("Ignore balance information, always return "
60 "(1: Even, 2: Odd)."),
62
63//===----------------------------------------------------------------------===//
64// Helper functions
65
66// Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
67static bool isMul(MachineInstr *MI) {
68 switch (MI->getOpcode()) {
69 case AArch64::FMULSrr:
70 case AArch64::FNMULSrr:
71 case AArch64::FMULDrr:
72 case AArch64::FNMULDrr:
73 return true;
74 default:
75 return false;
76 }
77}
78
79// Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
80static bool isMla(MachineInstr *MI) {
81 switch (MI->getOpcode()) {
82 case AArch64::FMSUBSrrr:
83 case AArch64::FMADDSrrr:
84 case AArch64::FNMSUBSrrr:
85 case AArch64::FNMADDSrrr:
86 case AArch64::FMSUBDrrr:
87 case AArch64::FMADDDrrr:
88 case AArch64::FNMSUBDrrr:
89 case AArch64::FNMADDDrrr:
90 return true;
91 default:
92 return false;
93 }
94}
95
96//===----------------------------------------------------------------------===//
97
98namespace {
99/// A "color", which is either even or odd. Yes, these aren't really colors
100/// but the algorithm is conceptually doing two-color graph coloring.
101enum class Color { Even, Odd };
102#ifndef NDEBUG
103static const char *ColorNames[2] = { "Even", "Odd" };
104#endif
105
106class Chain;
107
108class AArch64A57FPLoadBalancingImpl {
109public:
110 bool run(MachineFunction &MF);
111
112private:
113 MachineRegisterInfo *MRI;
114 const TargetRegisterInfo *TRI;
115 RegisterClassInfo RCI;
116
117 bool runOnBasicBlock(MachineBasicBlock &MBB);
118 bool colorChainSet(std::vector<Chain *> GV, MachineBasicBlock &MBB,
119 int &Balance);
120 bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
121 int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
122 void scanInstruction(MachineInstr *MI, unsigned Idx,
123 std::map<unsigned, Chain *> &Active,
124 std::vector<std::unique_ptr<Chain>> &AllChains);
125 void maybeKillChain(MachineOperand &MO, unsigned Idx,
126 std::map<unsigned, Chain *> &RegChains);
127 Color getColor(unsigned Register);
128 Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain *> &L);
129};
130
131class AArch64A57FPLoadBalancingLegacy : public MachineFunctionPass {
132public:
133 static char ID;
134 explicit AArch64A57FPLoadBalancingLegacy() : MachineFunctionPass(ID) {}
135
136 bool runOnMachineFunction(MachineFunction &MF) override;
137
138 MachineFunctionProperties getRequiredProperties() const override {
139 return MachineFunctionProperties().setNoVRegs();
140 }
141
142 StringRef getPassName() const override {
143 return "A57 FP Anti-dependency breaker";
144 }
145
146 void getAnalysisUsage(AnalysisUsage &AU) const override {
147 AU.setPreservesCFG();
149 }
150};
151}
152
153char AArch64A57FPLoadBalancingLegacy::ID = 0;
154
155INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancingLegacy, DEBUG_TYPE,
156 "AArch64 A57 FP Load-Balancing", false, false)
157INITIALIZE_PASS_END(AArch64A57FPLoadBalancingLegacy, DEBUG_TYPE,
158 "AArch64 A57 FP Load-Balancing", false, false)
159
160namespace {
161/// A Chain is a sequence of instructions that are linked together by
162/// an accumulation operand. For example:
163///
164/// fmul def d0, ?
165/// fmla def d1, ?, ?, killed d0
166/// fmla def d2, ?, ?, killed d1
167///
168/// There may be other instructions interleaved in the sequence that
169/// do not belong to the chain. These other instructions must not use
170/// the "chain" register at any point.
171///
172/// We currently only support chains where the "chain" operand is killed
173/// at each link in the chain for simplicity.
174/// A chain has three important instructions - Start, Last and Kill.
175/// * The start instruction is the first instruction in the chain.
176/// * Last is the final instruction in the chain.
177/// * Kill may or may not be defined. If defined, Kill is the instruction
178/// where the outgoing value of the Last instruction is killed.
179/// This information is important as if we know the outgoing value is
180/// killed with no intervening uses, we can safely change its register.
181///
182/// Without a kill instruction, we must assume the outgoing value escapes
183/// beyond our model and either must not change its register or must
184/// create a fixup FMOV to keep the old register value consistent.
185///
186class Chain {
187public:
188 /// The important (marker) instructions.
190 /// The index, from the start of the basic block, that each marker
191 /// appears. These are stored so we can do quick interval tests.
193 /// All instructions in the chain.
194 std::set<MachineInstr*> Insts;
195 /// True if KillInst cannot be modified. If this is true,
196 /// we cannot change LastInst's outgoing register.
197 /// This will be true for tied values and regmasks.
199 /// The "color" of LastInst. This will be the preferred chain color,
200 /// as changing intermediate nodes is easy but changing the last
201 /// instruction can be more tricky.
203
204 Chain(MachineInstr *MI, unsigned Idx, Color C)
205 : StartInst(MI), LastInst(MI), KillInst(nullptr),
206 StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
207 LastColor(C) {
208 Insts.insert(MI);
209 }
210
211 /// Add a new instruction into the chain. The instruction's dest operand
212 /// has the given color.
213 void add(MachineInstr *MI, unsigned Idx, Color C) {
214 LastInst = MI;
215 LastInstIdx = Idx;
216 LastColor = C;
218 "Chain: broken invariant. A Chain can only be killed after its last "
219 "def");
220
221 Insts.insert(MI);
222 }
223
224 /// Return true if MI is a member of the chain.
225 bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
226
227 /// Return the number of instructions in the chain.
228 unsigned size() const {
229 return Insts.size();
230 }
231
232 /// Inform the chain that its last active register (the dest register of
233 /// LastInst) is killed by MI with no intervening uses or defs.
234 void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
235 KillInst = MI;
236 KillInstIdx = Idx;
237 KillIsImmutable = Immutable;
239 "Chain: broken invariant. A Chain can only be killed after its last "
240 "def");
241 }
242
243 /// Return the first instruction in the chain.
244 MachineInstr *getStart() const { return StartInst; }
245 /// Return the last instruction in the chain.
246 MachineInstr *getLast() const { return LastInst; }
247 /// Return the "kill" instruction (as set with setKill()) or NULL.
248 MachineInstr *getKill() const { return KillInst; }
249 /// Return an instruction that can be used as an iterator for the end
250 /// of the chain. This is the maximum of KillInst (if set) and LastInst.
255
256 /// Can the Kill instruction (assuming one exists) be modified?
257 bool isKillImmutable() const { return KillIsImmutable; }
258
259 /// Return the preferred color of this chain.
261 if (OverrideBalance != 0)
262 return OverrideBalance == 1 ? Color::Even : Color::Odd;
263 return LastColor;
264 }
265
266 /// Return true if this chain (StartInst..KillInst) overlaps with Other.
267 bool rangeOverlapsWith(const Chain &Other) const {
268 unsigned End = KillInst ? KillInstIdx : LastInstIdx;
269 unsigned OtherEnd = Other.KillInst ?
270 Other.KillInstIdx : Other.LastInstIdx;
271
272 return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
273 }
274
275 /// Return true if this chain starts before Other.
276 bool startsBefore(const Chain *Other) const {
277 return StartInstIdx < Other->StartInstIdx;
278 }
279
280 /// Return true if the group will require a fixup MOV at the end.
281 bool requiresFixup() const {
282 return (getKill() && isKillImmutable()) || !getKill();
283 }
284
285 /// Return a simple string representation of the chain.
286 std::string str() const {
287 std::string S;
288 raw_string_ostream OS(S);
289
290 OS << "{";
291 StartInst->print(OS, /* SkipOpers= */true);
292 OS << " -> ";
293 LastInst->print(OS, /* SkipOpers= */true);
294 if (KillInst) {
295 OS << " (kill @ ";
296 KillInst->print(OS, /* SkipOpers= */true);
297 OS << ")";
298 }
299 OS << "}";
300
301 return OS.str();
302 }
303
304};
305
306} // end anonymous namespace
307
308//===----------------------------------------------------------------------===//
309
310bool AArch64A57FPLoadBalancingImpl::run(MachineFunction &MF) {
311 if (!MF.getSubtarget<AArch64Subtarget>().balanceFPOps())
312 return false;
313
314 bool Changed = false;
315 LLVM_DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
316
317 MRI = &MF.getRegInfo();
319 RCI.runOnMachineFunction(MF);
320
321 for (auto &MBB : MF) {
323 }
324
325 return Changed;
326}
327
328bool AArch64A57FPLoadBalancingLegacy::runOnMachineFunction(
329 MachineFunction &MF) {
330 if (skipFunction(MF.getFunction()))
331 return false;
332 return AArch64A57FPLoadBalancingImpl().run(MF);
333}
334
335PreservedAnalyses
338 if (AArch64A57FPLoadBalancingImpl().run(MF)) {
341 return PA;
342 }
343 return PreservedAnalyses::all();
344}
345
346bool AArch64A57FPLoadBalancingImpl::runOnBasicBlock(MachineBasicBlock &MBB) {
347 bool Changed = false;
348 LLVM_DEBUG(dbgs() << "Running on MBB: " << MBB
349 << " - scanning instructions...\n");
350
351 // First, scan the basic block producing a set of chains.
352
353 // The currently "active" chains - chains that can be added to and haven't
354 // been killed yet. This is keyed by register - all chains can only have one
355 // "link" register between each inst in the chain.
356 std::map<unsigned, Chain*> ActiveChains;
357 std::vector<std::unique_ptr<Chain>> AllChains;
358 unsigned Idx = 0;
359 for (auto &MI : MBB)
360 scanInstruction(&MI, Idx++, ActiveChains, AllChains);
361
362 LLVM_DEBUG(dbgs() << "Scan complete, " << AllChains.size()
363 << " chains created.\n");
364
365 // Group the chains into disjoint sets based on their liveness range. This is
366 // a poor-man's version of graph coloring. Ideally we'd create an interference
367 // graph and perform full-on graph coloring on that, but;
368 // (a) That's rather heavyweight for only two colors.
369 // (b) We expect multiple disjoint interference regions - in practice the live
370 // range of chains is quite small and they are clustered between loads
371 // and stores.
373 for (auto &I : AllChains)
374 EC.insert(I.get());
375
376 for (auto &I : AllChains)
377 for (auto &J : AllChains)
378 if (I != J && I->rangeOverlapsWith(*J))
379 EC.unionSets(I.get(), J.get());
380 LLVM_DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
381
382 // Now we assume that every member of an equivalence class interferes
383 // with every other member of that class, and with no members of other classes.
384
385 // Convert the EquivalenceClasses to a simpler set of sets.
386 std::vector<std::vector<Chain*> > V;
387 for (const auto &E : EC) {
388 if (!E->isLeader())
389 continue;
390 std::vector<Chain *> Cs(EC.member_begin(*E), EC.member_end());
391 if (Cs.empty()) continue;
392 V.push_back(std::move(Cs));
393 }
394
395 // Now we have a set of sets, order them by start address so
396 // we can iterate over them sequentially.
397 llvm::sort(V,
398 [](const std::vector<Chain *> &A, const std::vector<Chain *> &B) {
399 return A.front()->startsBefore(B.front());
400 });
401
402 // As we only have two colors, we can track the global (BB-level) balance of
403 // odds versus evens. We aim to keep this near zero to keep both execution
404 // units fed.
405 // Positive means we're even-heavy, negative we're odd-heavy.
406 //
407 // FIXME: If chains have interdependencies, for example:
408 // mul r0, r1, r2
409 // mul r3, r0, r1
410 // We do not model this and may color each one differently, assuming we'll
411 // get ILP when we obviously can't. This hasn't been seen to be a problem
412 // in practice so far, so we simplify the algorithm by ignoring it.
413 int Parity = 0;
414
415 for (auto &I : V)
416 Changed |= colorChainSet(std::move(I), MBB, Parity);
417
418 return Changed;
419}
420
421Chain *AArch64A57FPLoadBalancingImpl::getAndEraseNext(Color PreferredColor,
422 std::vector<Chain *> &L) {
423 if (L.empty())
424 return nullptr;
425
426 // We try and get the best candidate from L to color next, given that our
427 // preferred color is "PreferredColor". L is ordered from larger to smaller
428 // chains. It is beneficial to color the large chains before the small chains,
429 // but if we can't find a chain of the maximum length with the preferred color,
430 // we fuzz the size and look for slightly smaller chains before giving up and
431 // returning a chain that must be recolored.
432
433 // FIXME: Does this need to be configurable?
434 const unsigned SizeFuzz = 1;
435 unsigned MinSize = L.front()->size() - SizeFuzz;
436 for (auto I = L.begin(), E = L.end(); I != E; ++I) {
437 if ((*I)->size() <= MinSize) {
438 // We've gone past the size limit. Return the previous item.
439 Chain *Ch = *--I;
440 L.erase(I);
441 return Ch;
442 }
443
444 if ((*I)->getPreferredColor() == PreferredColor) {
445 Chain *Ch = *I;
446 L.erase(I);
447 return Ch;
448 }
449 }
450
451 // Bailout case - just return the first item.
452 Chain *Ch = L.front();
453 L.erase(L.begin());
454 return Ch;
455}
456
457bool AArch64A57FPLoadBalancingImpl::colorChainSet(std::vector<Chain *> GV,
458 MachineBasicBlock &MBB,
459 int &Parity) {
460 bool Changed = false;
461 LLVM_DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
462
463 // Sort by descending size order so that we allocate the most important
464 // sets first.
465 // Tie-break equivalent sizes by sorting chains requiring fixups before
466 // those without fixups. The logic here is that we should look at the
467 // chains that we cannot change before we look at those we can,
468 // so the parity counter is updated and we know what color we should
469 // change them to!
470 // Final tie-break with instruction order so pass output is stable (i.e. not
471 // dependent on malloc'd pointer values).
472 llvm::sort(GV, [](const Chain *G1, const Chain *G2) {
473 if (G1->size() != G2->size())
474 return G1->size() > G2->size();
475 if (G1->requiresFixup() != G2->requiresFixup())
476 return G1->requiresFixup() > G2->requiresFixup();
477 // Make sure startsBefore() produces a stable final order.
478 assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
479 "Starts before not total order!");
480 return G1->startsBefore(G2);
481 });
482
483 Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
484 while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
485 // Start off by assuming we'll color to our own preferred color.
486 Color C = PreferredColor;
487 if (Parity == 0)
488 // But if we really don't care, use the chain's preferred color.
489 C = G->getPreferredColor();
490
491 LLVM_DEBUG(dbgs() << " - Parity=" << Parity
492 << ", Color=" << ColorNames[(int)C] << "\n");
493
494 // If we'll need a fixup FMOV, don't bother. Testing has shown that this
495 // happens infrequently and when it does it has at least a 50% chance of
496 // slowing code down instead of speeding it up.
497 if (G->requiresFixup() && C != G->getPreferredColor()) {
498 C = G->getPreferredColor();
499 LLVM_DEBUG(dbgs() << " - " << G->str()
500 << " - not worthwhile changing; "
501 "color remains "
502 << ColorNames[(int)C] << "\n");
503 }
504
505 Changed |= colorChain(G, C, MBB);
506
507 Parity += (C == Color::Even) ? G->size() : -G->size();
508 PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
509 }
510
511 return Changed;
512}
513
514int AArch64A57FPLoadBalancingImpl::scavengeRegister(Chain *G, Color C,
515 MachineBasicBlock &MBB) {
516 // Can we find an appropriate register that is available throughout the life
517 // of the chain? Simulate liveness backwards until the end of the chain.
518 LiveRegUnits Units(*TRI);
519 Units.addLiveOuts(MBB);
521 MachineBasicBlock::iterator ChainEnd = G->end();
522 while (I != ChainEnd) {
523 --I;
524 if (!I->isDebugInstr())
525 Units.stepBackward(*I);
526 }
527
528 // Check which register units are alive throughout the chain.
529 MachineBasicBlock::iterator ChainBegin = G->begin();
530 assert(ChainBegin != ChainEnd && "Chain should contain instructions");
531 do {
532 --I;
533 Units.accumulate(*I);
534 } while (I != ChainBegin);
535
536 // Make sure we allocate in-order, to get the cheapest registers first.
537 unsigned RegClassID = ChainBegin->getDesc().operands()[0].RegClass;
538 auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
539 for (auto Reg : Ord) {
540 if (!Units.available(Reg))
541 continue;
542 if (C == getColor(Reg))
543 return Reg;
544 }
545
546 return -1;
547}
548
549bool AArch64A57FPLoadBalancingImpl::colorChain(Chain *G, Color C,
550 MachineBasicBlock &MBB) {
551 bool Changed = false;
552 LLVM_DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
553 << ColorNames[(int)C] << ")\n");
554
555 // Try and obtain a free register of the right class. Without a register
556 // to play with we cannot continue.
557 int Reg = scavengeRegister(G, C, MBB);
558 if (Reg == -1) {
559 LLVM_DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
560 return false;
561 }
562 LLVM_DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n");
563
564 std::map<unsigned, unsigned> Substs;
565 for (MachineInstr &I : *G) {
566 if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
567 continue;
568
569 // I is a member of G, or I is a mutable instruction that kills G.
570
571 std::vector<unsigned> ToErase;
572 for (auto &U : I.operands()) {
573 if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
574 Register OrigReg = U.getReg();
575 U.setReg(Substs[OrigReg]);
576 if (U.isKill())
577 // Don't erase straight away, because there may be other operands
578 // that also reference this substitution!
579 ToErase.push_back(OrigReg);
580 } else if (U.isRegMask()) {
581 for (auto J : Substs) {
582 if (U.clobbersPhysReg(J.first))
583 ToErase.push_back(J.first);
584 }
585 }
586 }
587 // Now it's safe to remove the substs identified earlier.
588 for (auto J : ToErase)
589 Substs.erase(J);
590
591 // Only change the def if this isn't the last instruction.
592 if (&I != G->getKill()) {
593 MachineOperand &MO = I.getOperand(0);
594
595 bool Change = TransformAll || getColor(MO.getReg()) != C;
596 if (G->requiresFixup() && &I == G->getLast())
597 Change = false;
598
599 if (Change) {
600 Substs[MO.getReg()] = Reg;
601 MO.setReg(Reg);
602
603 Changed = true;
604 }
605 }
606 }
607 assert(Substs.size() == 0 && "No substitutions should be left active!");
608
609 if (G->getKill()) {
610 LLVM_DEBUG(dbgs() << " - Kill instruction seen.\n");
611 } else {
612 // We didn't have a kill instruction, but we didn't seem to need to change
613 // the destination register anyway.
614 LLVM_DEBUG(dbgs() << " - Destination register not changed.\n");
615 }
616 return Changed;
617}
618
619void AArch64A57FPLoadBalancingImpl::scanInstruction(
620 MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
621 std::vector<std::unique_ptr<Chain>> &AllChains) {
622 // Inspect "MI", updating ActiveChains and AllChains.
623
624 if (isMul(MI)) {
625
626 for (auto &I : MI->uses())
627 maybeKillChain(I, Idx, ActiveChains);
628 for (auto &I : MI->defs())
629 maybeKillChain(I, Idx, ActiveChains);
630
631 // Create a new chain. Multiplies don't require forwarding so can go on any
632 // unit.
633 Register DestReg = MI->getOperand(0).getReg();
634
635 LLVM_DEBUG(dbgs() << "New chain started for register "
636 << printReg(DestReg, TRI) << " at " << *MI);
637
638 auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
639 ActiveChains[DestReg] = G.get();
640 AllChains.push_back(std::move(G));
641
642 } else if (isMla(MI)) {
643
644 // It is beneficial to keep MLAs on the same functional unit as their
645 // accumulator operand.
646 Register DestReg = MI->getOperand(0).getReg();
647 Register AccumReg = MI->getOperand(3).getReg();
648
649 maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
650 maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
651 if (DestReg != AccumReg)
652 maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
653
654 if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
655 LLVM_DEBUG(dbgs() << "Chain found for accumulator register "
656 << printReg(AccumReg, TRI) << " in MI " << *MI);
657
658 // For simplicity we only chain together sequences of MULs/MLAs where the
659 // accumulator register is killed on each instruction. This means we don't
660 // need to track other uses of the registers we want to rewrite.
661 //
662 // FIXME: We could extend to handle the non-kill cases for more coverage.
663 if (MI->getOperand(3).isKill()) {
664 // Add to chain.
665 LLVM_DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
666 ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
667 // Handle cases where the destination is not the same as the accumulator.
668 if (DestReg != AccumReg) {
669 ActiveChains[DestReg] = ActiveChains[AccumReg];
670 ActiveChains.erase(AccumReg);
671 }
672 return;
673 }
674
676 dbgs() << "Cannot add to chain because accumulator operand wasn't "
677 << "marked <kill>!\n");
678 maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
679 }
680
681 LLVM_DEBUG(dbgs() << "Creating new chain for dest register "
682 << printReg(DestReg, TRI) << "\n");
683 auto G = std::make_unique<Chain>(MI, Idx, getColor(DestReg));
684 ActiveChains[DestReg] = G.get();
685 AllChains.push_back(std::move(G));
686
687 } else {
688
689 // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
690 // lists.
691 for (auto &I : MI->uses())
692 maybeKillChain(I, Idx, ActiveChains);
693 for (auto &I : MI->defs())
694 maybeKillChain(I, Idx, ActiveChains);
695
696 }
697}
698
699void AArch64A57FPLoadBalancingImpl::maybeKillChain(
700 MachineOperand &MO, unsigned Idx,
701 std::map<unsigned, Chain *> &ActiveChains) {
702 // Given an operand and the set of active chains (keyed by register),
703 // determine if a chain should be ended and remove from ActiveChains.
704 MachineInstr *MI = MO.getParent();
705
706 if (MO.isReg()) {
707
708 // If this is a KILL of a current chain, record it.
709 if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
710 LLVM_DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI)
711 << "\n");
712 ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
713 }
714 ActiveChains.erase(MO.getReg());
715
716 } else if (MO.isRegMask()) {
717
718 for (auto I = ActiveChains.begin(), E = ActiveChains.end();
719 I != E;) {
720 if (MO.clobbersPhysReg(I->first)) {
721 LLVM_DEBUG(dbgs() << "Kill (regmask) seen for chain "
722 << printReg(I->first, TRI) << "\n");
723 I->second->setKill(MI, Idx, /*Immutable=*/true);
724 ActiveChains.erase(I++);
725 } else
726 ++I;
727 }
728
729 }
730}
731
732Color AArch64A57FPLoadBalancingImpl::getColor(unsigned Reg) {
733 if ((TRI->getEncodingValue(Reg) % 2) == 0)
734 return Color::Even;
735 else
736 return Color::Odd;
737}
738
739// Factory function used by AArch64TargetMachine to add the pass to the passmanager.
741 return new AArch64A57FPLoadBalancingLegacy();
742}
static bool isMul(MachineInstr *MI)
static cl::opt< unsigned > OverrideBalance("aarch64-a57-fp-load-balancing-override", cl::desc("Ignore balance information, always return " "(1: Even, 2: Odd)."), cl::init(0), cl::Hidden)
static cl::opt< bool > TransformAll("aarch64-a57-fp-load-balancing-force-all", cl::desc("Always modify dest registers regardless of color"), cl::init(false), cl::Hidden)
static bool isMla(MachineInstr *MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file declares the machine register scavenger class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
MachineInstr * StartInst
The important (marker) instructions.
MachineBasicBlock::iterator begin() const
bool isKillImmutable() const
Can the Kill instruction (assuming one exists) be modified?
void add(MachineInstr *MI, unsigned Idx, Color C)
Add a new instruction into the chain.
bool contains(MachineInstr &MI)
Return true if MI is a member of the chain.
Color LastColor
The "color" of LastInst.
bool requiresFixup() const
Return true if the group will require a fixup MOV at the end.
MachineInstr * getLast() const
Return the last instruction in the chain.
bool KillIsImmutable
True if KillInst cannot be modified.
bool rangeOverlapsWith(const Chain &Other) const
Return true if this chain (StartInst..KillInst) overlaps with Other.
MachineInstr * getStart() const
Return the first instruction in the chain.
unsigned size() const
Return the number of instructions in the chain.
MachineBasicBlock::iterator end() const
Return an instruction that can be used as an iterator for the end of the chain.
void setKill(MachineInstr *MI, unsigned Idx, bool Immutable)
Inform the chain that its last active register (the dest register of LastInst) is killed by MI with n...
MachineInstr * getKill() const
Return the "kill" instruction (as set with setKill()) or NULL.
unsigned StartInstIdx
The index, from the start of the basic block, that each marker appears.
Color getPreferredColor()
Return the preferred color of this chain.
std::string str() const
Return a simple string representation of the chain.
std::set< MachineInstr * > Insts
All instructions in the chain.
Chain(MachineInstr *MI, unsigned Idx, Color C)
bool startsBefore(const Chain *Other) const
Return true if this chain starts before Other.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
This represents a collection of equivalence classes and supports three efficient operations: insert a...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterInfo * getTargetRegisterInfo() const
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
This is an optimization pass for GlobalISel generic memory operations.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
FunctionPass * createAArch64A57FPLoadBalancingLegacyPass()
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.