LLVM 23.0.0git
SystemZMachineScheduler.cpp
Go to the documentation of this file.
1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
23 bool CCDef) const {
24 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
25 (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
26 const MachineOperand &MO0 = MI->getOperand(0);
27 if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
28 return true;
29 }
30 return false;
31}
32
33bool SystemZPreRASchedStrategy::closesLiveRange(const SUnit *SU,
34 ScheduleDAGMILive *DAG) const {
35 if (SU->getInstr()->isCopy())
36 return false;
37
38 // Extract the PressureChanges that all fp/vector or GR64/GR32/GRH32 regs
39 // affect respectively. misched-prera-pdiffs.mir tests against any future
40 // change in the PressureSets modelling, so simply hard-code them here.
41 int VR16PChange = 0, GRX32PChange = 0;
42 const PressureDiff &PDiff = DAG->getPressureDiff(SU);
43 for (const PressureChange &PC : PDiff) {
44 if (!PC.isValid())
45 break;
46 if (PC.getPSet() == SystemZ::VR16Bit)
47 VR16PChange = PC.getUnitInc();
48 else if (PC.getPSet() == SystemZ::GRX32Bit)
49 GRX32PChange = PC.getUnitInc();
50 }
51
52 // Return true for a (vreg) def when register pressure is reduced. Prioritize
53 // FP/vector regs over GPRs.
54 return VR16PChange < 0 || (!VR16PChange && GRX32PChange < 0);
55}
56
58 SchedCandidate &TryCand,
59 SchedBoundary *Zone) const {
60 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
61
62 // Initialize the candidate if needed.
63 if (!Cand.isValid()) {
64 TryCand.Reason = FirstValid;
65 return true;
66 }
67
68 // Bias physreg defs and copies to their uses and definitions respectively.
69 if (tryBiasPhysRegs(TryCand, Cand, Zone, /*BiasPRegsExtra=*/true))
70 return TryCand.Reason != NoCand;
71
72 if (RegionPolicy.ShouldTrackPressure) {
73 auto schedLow = [&](const SUnit *SU) {
74 return SU->getHeight() <= Zone->getScheduledLatency() &&
75 SU->getHeight() < LivenessHeightCutOff && closesLiveRange(SU, DAG);
76 };
77 // One SU closes a live range while preserving the scheduled latency.
78 if (tryGreater(schedLow(TryCand.SU), schedLow(Cand.SU), TryCand, Cand,
79 RegExcess))
80 return TryCand.Reason != NoCand;
81 }
82
83 // Do latency scheduling by trying to not increase the scheduled latency
84 // (going bottom-up). This is important for performance, but it is best to
85 // only do it when likely beneficial and not "move everything around": Only
86 // apply this to SUs that have somewhat longer latencies, and don't push an
87 // SU upwards if it's on the critical path.
88 if (!RegionPolicy.DisableLatencyHeuristic &&
89 std::max(TryCand.SU->Latency, Cand.SU->Latency) >= 5)
90 if (const SUnit *HigherSU =
91 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
92 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
93 : nullptr)
94 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
95 HigherSU->getDepth() < computeRemLatency(*Zone)) {
96 // HigherSU would increase the scheduled latency and there is room
97 // above for it next to the critical path, so wait with it.
98 tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
100 return TryCand.Reason != NoCand;
101 }
102
103 // Weak edges help copy coalescing.
104 if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
105 Weak))
106 return TryCand.Reason != NoCand;
107
108 // Help compare with zero elimination.
109 if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
110 definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
111 return TryCand.Reason != NoCand;
112
113 // Fall through to original instruction order.
114 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
115 TryCand.Reason = NodeOrder;
116 return true;
117 }
118
119 return false;
120}
121
124 unsigned NumRegionInstrs) {
125 // TopRegionSUs is the number of SUs that is considered to be part of the
126 // "top" of a region. Liveness reduction is not done in regions smaller than
127 // this. The idea is to prioritize latency more after branches and help
128 // liveness only when the decoder is ahead of execution anyway.
129 static const unsigned TopRegionSUs = 36;
130
131 // Avoid setting up the register pressure tracker unless needed to save
132 // compile time.
133 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > TopRegionSUs;
134
135 // These heuristics has so far seemed to work better without adding a
136 // top-down boundary.
137 RegionPolicy.OnlyBottomUp = true;
138
140 this->NumRegionInstrs = NumRegionInstrs;
141}
142
145
146 Cmp0SrcReg = SystemZ::NoRegister;
147
148 unsigned DAGHeight = 0;
149 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
150 DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
151
152 if (RegionPolicy.ShouldTrackPressure)
153 LivenessHeightCutOff = DAGHeight / (DAG->SUnits.size() < 50 ? 4 : 2);
154
155 // Disable latency reduction if region has many SUs relative to the
156 // overall height.
157 RegionPolicy.DisableLatencyHeuristic =
158 DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
159}
160
162 GenericScheduler::schedNode(SU, IsTopNode);
163
164 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
165 MachineInstr *MI = SU->getInstr();
166 if (TII->isCompareZero(*MI))
167 Cmp0SrcReg = TII->getCompareSourceReg(*MI);
168 else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
169 definesCmp0Src(MI, /*CCDef=*/false))
170 Cmp0SrcReg = SystemZ::NoRegister;
171}
172
173/// Post-RA scheduling ///
174
175#ifndef NDEBUG
176// Print the set of SUs
177void SystemZPostRASchedStrategy::SUSet::
178dump(SystemZHazardRecognizer &HazardRec) const {
179 dbgs() << "{";
180 for (auto &SU : *this) {
181 HazardRec.dumpSU(SU, dbgs());
182 if (SU != *rbegin())
183 dbgs() << ", ";
184 }
185 dbgs() << "}\n";
186}
187#endif
188
189// Try to find a single predecessor that would be interesting for the
190// scheduler in the top-most region of MBB.
192 const MachineLoop *Loop) {
193 MachineBasicBlock *PredMBB = nullptr;
194 if (MBB->pred_size() == 1)
195 PredMBB = *MBB->pred_begin();
196
197 // The loop header has two predecessors, return the latch, but not for a
198 // single block loop.
199 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
200 for (MachineBasicBlock *Pred : MBB->predecessors())
201 if (Loop->contains(Pred))
202 PredMBB = (Pred == MBB ? nullptr : Pred);
203 }
204
205 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
206 && "Loop MBB should not consider predecessor outside of loop.");
207
208 return PredMBB;
209}
210
211void SystemZPostRASchedStrategy::
212advanceTo(MachineBasicBlock::iterator NextBegin) {
213 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
215 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
216 std::next(LastEmittedMI) : MBB->begin());
217
218 for (; I != NextBegin; ++I) {
219 if (I->isPosition() || I->isDebugInstr())
220 continue;
221 HazardRec->emitInstruction(&*I);
222 }
223}
224
226 Available.clear(); // -misched-cutoff.
227 LLVM_DEBUG(HazardRec->dumpState(););
228}
229
231 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
232 "Entering MBB twice?");
233 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
234
235 MBB = NextMBB;
236
237 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
238 /// point to it.
239 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
240 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
241 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
242 dbgs() << ":\n";);
243
244 // Try to take over the state from a single predecessor, if it has been
245 // scheduled. If this is not possible, we are done.
246 MachineBasicBlock *SinglePredMBB =
247 getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
248 if (SinglePredMBB == nullptr)
249 return;
250 auto It = SchedStates.find(SinglePredMBB);
251 if (It == SchedStates.end())
252 return;
253
254 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
255 << printMBBReference(*SinglePredMBB) << "\n";);
256
257 HazardRec->copyState(It->second);
258 LLVM_DEBUG(HazardRec->dumpState(););
259
260 // Emit incoming terminator(s). Be optimistic and assume that branch
261 // prediction will generally do "the right thing".
262 for (MachineInstr &MI : SinglePredMBB->terminators()) {
263 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
264 bool TakenBranch = (MI.isBranch() &&
265 (TII->getBranchInfo(MI).isIndirect() ||
266 TII->getBranchInfo(MI).getMBBTarget() == MBB));
267 HazardRec->emitInstruction(&MI, TakenBranch);
268 if (TakenBranch)
269 break;
270 }
271}
272
274 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
275
276 // Advance to first terminator. The successor block will handle terminators
277 // dependent on CFG layout (T/NT branch etc).
278 advanceTo(MBB->getFirstTerminator());
279}
280
283 : MLI(C->MLI),
284 TII(static_cast<const SystemZInstrInfo *>
285 (C->MF->getSubtarget().getInstrInfo())),
286 MBB(nullptr), HazardRec(nullptr) {
287 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
288 SchedModel.init(ST);
289}
290
292 // Delete hazard recognizers kept around for each MBB.
293 for (auto I : SchedStates) {
294 SystemZHazardRecognizer *hazrec = I.second;
295 delete hazrec;
296 }
297}
298
301 unsigned NumRegionInstrs) {
302 // Don't emit the terminators.
303 if (Begin->isTerminator())
304 return;
305
306 // Emit any instructions before start of region.
307 advanceTo(Begin);
308}
309
310// Pick the next node to schedule.
312 // Only scheduling top-down.
313 IsTopNode = true;
314
315 if (Available.empty())
316 return nullptr;
317
318 // If only one choice, return it.
319 if (Available.size() == 1) {
320 LLVM_DEBUG(dbgs() << "** Only one: ";
321 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
322 return *Available.begin();
323 }
324
325 // All nodes that are possible to schedule are stored in the Available set.
326 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
327
328 Candidate Best;
329 for (auto *SU : Available) {
330
331 // SU is the next candidate to be compared against current Best.
332 Candidate c(SU, *HazardRec);
333
334 // Remeber which SU is the best candidate.
335 if (Best.SU == nullptr || c < Best) {
336 Best = c;
337 LLVM_DEBUG(dbgs() << "** Best so far: ";);
338 } else
339 LLVM_DEBUG(dbgs() << "** Tried : ";);
340 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
341 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
342
343 // Once we know we have seen all SUs that affect grouping or use unbuffered
344 // resources, we can stop iterating if Best looks good.
345 if (!SU->isScheduleHigh && Best.noCost())
346 break;
347 }
348
349 assert (Best.SU != nullptr);
350 return Best.SU;
351}
352
353SystemZPostRASchedStrategy::Candidate::
354Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
355 SU = SU_;
356
357 // Check the grouping cost. For a node that must begin / end a
358 // group, it is positive if it would do so prematurely, or negative
359 // if it would fit naturally into the schedule.
360 GroupingCost = HazardRec.groupingCost(SU);
361
362 // Check the resources cost for this SU.
363 ResourcesCost = HazardRec.resourcesCost(SU);
364}
365
366bool SystemZPostRASchedStrategy::Candidate::
367operator<(const Candidate &other) {
368
369 // Check decoder grouping.
370 if (GroupingCost < other.GroupingCost)
371 return true;
372 if (GroupingCost > other.GroupingCost)
373 return false;
374
375 // Compare the use of resources.
376 if (ResourcesCost < other.ResourcesCost)
377 return true;
378 if (ResourcesCost > other.ResourcesCost)
379 return false;
380
381 // Higher SU is otherwise generally better.
382 if (SU->getHeight() > other.SU->getHeight())
383 return true;
384 if (SU->getHeight() < other.SU->getHeight())
385 return false;
386
387 // If all same, fall back to original order.
388 if (SU->NodeNum < other.SU->NodeNum)
389 return true;
390
391 return false;
392}
393
395 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
396 if (Available.size() == 1) dbgs() << "(only one) ";
397 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
398
399 // Remove SU from Available set and update HazardRec.
400 Available.erase(SU);
401 HazardRec->EmitInstruction(SU);
402}
403
405 // Set isScheduleHigh flag on all SUs that we want to consider first in
406 // pickNode().
407 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
408 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
409 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
410
411 // Put all released SUs in the Available set.
412 Available.insert(SU);
413}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
if(PassOpts->AAPipeline)
#define LLVM_DEBUG(...)
Definition Debug.h:119
static MachineBasicBlock * getSingleSchedPred(MachineBasicBlock *MBB, const MachineLoop *Loop)
static bool isRegDef(const MachineOperand &MO)
Pre-RA scheduling ///.
MachineSchedPolicy RegionPolicy
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
bool isCopy() const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Scheduling unit. This is a node in the scheduling DAG.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
bool isScheduleHigh
True if preferable to schedule high.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void dumpSU(SUnit *SU, raw_ostream &OS) const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void leaveMBB() override
Tell the strategy that current MBB is done.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Called for a region before scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
ScheduleDAGMI has scheduled an instruction - tell HazardRec about it.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void enterMBB(MachineBasicBlock *NextMBB) override
Tell the strategy that MBB is about to be processed.
SystemZPostRASchedStrategy(const MachineSchedContext *C)
void releaseTopNode(SUnit *SU) override
SU has had all predecessor dependencies resolved.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
TargetSubtargetInfo - Generic base class for all target subtargets.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI bool tryBiasPhysRegs(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone, bool BiasPRegsExtra)
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
LLVM_ABI unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:129
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...