LLVM 23.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
24
25namespace llvm {
26
28class SIRegisterInfo;
29class GCNSubtarget;
30class GCNSchedStage;
31
41
42#ifndef NDEBUG
43raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
44#endif
45
46/// This is a minimal scheduler strategy. The main difference between this
47/// and the GenericScheduler is that GCNSchedStrategy uses different
48/// heuristics to determine excess/critical pressure sets.
50protected:
51 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
52
53 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
54 const RegPressureTracker &RPTracker,
55 SchedCandidate &Cand, bool &IsPending,
56 bool IsBottomUp);
57
58 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
59 const RegPressureTracker &RPTracker,
60 const SIRegisterInfo *SRI, unsigned SGPRPressure,
61 unsigned VGPRPressure, bool IsBottomUp);
62
63 /// Estimate how many cycles \p SU must wait due to structural hazards at the
64 /// current boundary cycle. Returns zero when no stall is required.
65 unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const;
66
67 /// Evaluates instructions in the pending queue using a subset of scheduling
68 /// heuristics.
69 ///
70 /// Instructions that cannot be issued due to hardware constraints are placed
71 /// in the pending queue rather than the available queue, making them normally
72 /// invisible to scheduling heuristics. However, in certain scenarios (such as
73 /// avoiding register spilling), it may be beneficial to consider scheduling
74 /// these not-yet-ready instructions.
76 SchedBoundary *Zone) const;
77
78 void printCandidateDecision(const SchedCandidate &Current,
79 const SchedCandidate &Preferred);
80
81 void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker,
82 SUnit *SU, std::vector<unsigned> &Pressure,
83 std::vector<unsigned> &MaxPressure,
86 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI);
87
88 std::vector<unsigned> Pressure;
89
90 std::vector<unsigned> MaxPressure;
91
93
95
97
99
100 // Scheduling stages for this strategy.
102
103 // Pointer to the current SchedStageID.
105
106 // GCN RP Tracker for top-down scheduling
108
109 // GCN RP Tracker for botttom-up scheduling
111
112 bool UseGCNTrackers = false;
113
114 std::optional<bool> GCNTrackersOverride;
115
116public:
117 // schedule() have seen register pressure over the critical limits and had to
118 // track register pressure for actual scheduling heuristics.
120
121 // Schedule known to have excess register pressure. Be more conservative in
122 // increasing ILP and preserving VGPRs.
123 bool KnownExcessRP = false;
124
125 // An error margin is necessary because of poor performance of the generic RP
126 // tracker and can be adjusted up for tuning heuristics to try and more
127 // aggressively reduce register pressure.
128 unsigned ErrorMargin = 3;
129
130 // Bias for SGPR limits under a high register pressure.
131 const unsigned HighRPSGPRBias = 7;
132
133 // Bias for VGPR limits under a high register pressure.
134 const unsigned HighRPVGPRBias = 7;
135
137
139
140 unsigned SGPRLimitBias = 0;
141
142 unsigned VGPRLimitBias = 0;
143
145
146 SUnit *pickNode(bool &IsTopNode) override;
147
148 void schedNode(SUnit *SU, bool IsTopNode) override;
149
150 void initialize(ScheduleDAGMI *DAG) override;
151
152 unsigned getTargetOccupancy() { return TargetOccupancy; }
153
154 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
155
157
158 // Advances stage. Returns true if there are remaining stages.
159 bool advanceStage();
160
161 bool hasNextStage() const;
162
163 bool useGCNTrackers() const {
164 return GCNTrackersOverride.value_or(UseGCNTrackers);
165 }
166
168
170
172};
173
174/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
175/// maximum number of waves per simd).
177public:
179 bool IsLegacyScheduler = false);
180};
181
182/// The goal of this scheduling strategy is to maximize ILP for a single wave
183/// (i.e. latency hiding).
185protected:
186 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
187 SchedBoundary *Zone) const override;
188
189public:
191};
192
193/// The goal of this scheduling strategy is to maximize memory clause for a
194/// single wave.
196protected:
197 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
198 SchedBoundary *Zone) const override;
199
200public:
202};
203
205 unsigned ScheduleLength;
206 unsigned BubbleCycles;
207
208public:
209 ScheduleMetrics() = default;
210 ScheduleMetrics(unsigned L, unsigned BC)
211 : ScheduleLength(L), BubbleCycles(BC) {}
212 unsigned getLength() const { return ScheduleLength; }
213 unsigned getBubbles() const { return BubbleCycles; }
214 unsigned getMetric() const {
215 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
216 // Metric is zero if the amount of bubbles is less than 1% which is too
217 // small. So, return 1.
218 return Metric ? Metric : 1;
219 }
220 static const unsigned ScaleFactor;
221};
222
224 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
225 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
226 << Sm.getLength() << " ]\n";
227 return OS;
228}
229
230class GCNScheduleDAGMILive;
233 // The live in/out pressure as indexed by the first or last MI in the region
234 // before scheduling.
236 // The mapping of RegionIDx to key instruction
237 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
238 // Whether we are calculating LiveOuts or LiveIns
239 bool IsLiveOut;
240
241public:
242 RegionPressureMap() = default;
244 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
245 // Build the Instr->LiveReg and RegionIdx->Instr maps
246 void buildLiveRegMap();
247
248 // Retrieve the LiveReg for a given RegionIdx
250 assert(IdxToInstruction.contains(RegionIdx));
251 MachineInstr *Key = IdxToInstruction[RegionIdx];
252 return RegionLiveRegMap[Key];
253 }
254};
255
256/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
257/// boundary is inclusive, the upper boundary is exclusive.
259 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
260
262 friend class GCNSchedStage;
267 friend class PreRARematStage;
269 friend class RegionPressureMap;
270
271 const GCNSubtarget &ST;
272
274
275 // Occupancy target at the beginning of function scheduling cycle.
276 unsigned StartingOccupancy;
277
278 // Minimal real occupancy recorder for the function.
279 unsigned MinOccupancy;
280
281 // Vector of regions recorder for later rescheduling
283
284 // Record regions with high register pressure.
285 BitVector RegionsWithHighRP;
286
287 // Record regions with excess register pressure over the physical register
288 // limit. Register pressure in these regions usually will result in spilling.
289 BitVector RegionsWithExcessRP;
290
291 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
292 BitVector RegionsWithIGLPInstrs;
293
294 // Region live-in cache.
296
297 // Region pressure cache.
299
300 // Temporary basic block live-in cache.
302
303 // The map of the initial first region instruction to region live in registers
305
306 // Calculate the map of the initial first region instruction to region live in
307 // registers
309
310 // Calculate the map of the initial last region instruction to region live out
311 // registers
313 getRegionLiveOutMap() const;
314
315 // The live out registers per region. These are internally stored as a map of
316 // the initial last region instruction to region live out registers, but can
317 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
318 RegionPressureMap RegionLiveOuts;
319
320 // Return current region pressure.
321 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
322
323 // Compute and cache live-ins and pressure for all regions in block.
324 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
325
326 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
327 void setTargetOccupancy(unsigned TargetOccupancy);
328
329 void runSchedStages();
330
331 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
332
333 void deleteMI(unsigned RegionIdx, MachineInstr *MI);
334
335public:
337 std::unique_ptr<MachineSchedStrategy> S);
338
339 void schedule() override;
340
341 void finalizeSchedule() override;
342};
343
344// GCNSchedStrategy applies multiple scheduling stages to a function.
346protected:
348
350
352
354
356
358
359 // The current block being scheduled.
361
362 // Current region index.
363 unsigned RegionIdx = 0;
364
365 // Record the original order of instructions before scheduling.
366 std::vector<MachineInstr *> Unsched;
367
368 // RP before scheduling the current region.
370
371 // RP after scheduling the current region.
373
374 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
375
377
378public:
379 // Initialize state for a scheduling stage. Returns false if the current stage
380 // should be skipped.
381 virtual bool initGCNSchedStage();
382
383 // Finalize state after finishing a scheduling pass on the function.
384 virtual void finalizeGCNSchedStage();
385
386 // Setup for scheduling a region. Returns false if the current region should
387 // be skipped.
388 virtual bool initGCNRegion();
389
390 // Finalize state after scheduling a region.
391 virtual void finalizeGCNRegion();
392
393 // Track whether a new region is also a new MBB.
394 void setupNewBlock();
395
396 // Check result of scheduling.
397 void checkScheduling();
398
399 // computes the given schedule virtual execution time in clocks
400 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
402 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
403 DenseMap<unsigned, unsigned> &ReadyCycles,
404 const TargetSchedModel &SM);
405
406 // Returns true if scheduling should be reverted.
407 virtual bool shouldRevertScheduling(unsigned WavesAfter);
408
409 // Returns true if current region has known excess pressure.
410 bool isRegionWithExcessRP() const {
411 return DAG.RegionsWithExcessRP[RegionIdx];
412 }
413
414 // The region number this stage is currently working on
415 unsigned getRegionIdx() { return RegionIdx; }
416
417 // Returns true if the new schedule may result in more spilling.
418 bool mayCauseSpilling(unsigned WavesAfter);
419
420 /// Sets the schedule of region \p RegionIdx in block \p MBB to \p MIOrder.
421 /// The MIs in \p MIOrder must be exactly the same as the ones currently
422 /// existing inside the region, only in a different order that honors def-use
423 /// chains.
426
428
429 virtual ~GCNSchedStage() = default;
430};
431
439
441private:
442 // Record regions with excess archvgpr register pressure over the physical
443 // register limit. Register pressure in these regions usually will result in
444 // spilling.
445 BitVector RegionsWithExcessArchVGPR;
446
447 const SIInstrInfo *TII;
448 const SIRegisterInfo *SRI;
449
450 /// Do a speculative rewrite and collect copy locations. The speculative
451 /// rewrite allows us to calculate the RP of the code after the rewrite, and
452 /// the copy locations allow us to calculate the total cost of copies required
453 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
454 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
455 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
456 bool
457 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
458 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
460
461 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
462 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
463 /// costs, and \p RewriteCands to undo rewriting.
464 int64_t getRewriteCost(
465 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
466 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
467 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
468
469 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
470 bool
471 rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
472
473 /// \returns true if this MI is a rewrite candidate.
474 bool isRewriteCandidate(MachineInstr *MI) const;
475
476 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
477 /// DefIdxs
478 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
480
481 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
482 /// ReachingUses
483 void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS,
485
486public:
487 bool initGCNSchedStage() override;
488
491};
492
494private:
495 // Save the initial occupancy before starting this stage.
496 unsigned InitialOccupancy;
497 // Save the temporary target occupancy before starting this stage.
498 unsigned TempTargetOccupancy;
499 // Track whether any region was scheduled by this stage.
500 bool IsAnyRegionScheduled;
501
502public:
503 bool initGCNSchedStage() override;
504
505 void finalizeGCNSchedStage() override;
506
507 bool initGCNRegion() override;
508
509 bool shouldRevertScheduling(unsigned WavesAfter) override;
510
513};
514
515// Retry function scheduling if we found resulting occupancy and it is
516// lower than used for other scheduling passes. This will give more freedom
517// to schedule low register pressure blocks.
519public:
520 bool initGCNSchedStage() override;
521
522 bool initGCNRegion() override;
523
524 bool shouldRevertScheduling(unsigned WavesAfter) override;
525
528};
529
530/// Attempts to reduce function spilling or, if there is no spilling, to
531/// increase function occupancy by one with respect to register usage by sinking
532/// rematerializable instructions to their use. When the stage estimates that
533/// reducing spilling or increasing occupancy is possible, it tries to
534/// rematerialize as few registers as possible to reduce potential negative
535/// effects on function latency.
536///
537/// The stage only supports rematerializing registers that meet all of the
538/// following constraints.
539/// 1. The register is virtual and has a single defining instruction.
540/// 2. The single defining instruction is either deemed rematerializable by the
541/// target-independent logic, or if not, has no non-constant and
542/// non-ignorable physical register use.
543/// 3 The register has no virtual register use whose live range would be
544/// extended by the rematerialization.
545/// 4. The register has a single non-debug user in a different region from its
546/// defining region.
547/// 5. The register is not used by or using another register that is going to be
548/// rematerialized.
550private:
551 /// A rematerializable register.
552 struct RematReg {
553 /// Single MI defining the rematerializable register.
555 /// Single user of the rematerializable register.
557 /// Regions in which the register is live-in/live-out/live anywhere.
558 BitVector LiveIn, LiveOut, Live;
559 /// The rematerializable register's lane bitmask.
560 LaneBitmask Mask;
561 /// Defining and using regions.
562 unsigned DefRegion, UseRegion;
563
566 const DenseMap<MachineInstr *, unsigned> &MIRegion);
567
568 /// Returns the rematerializable register. Do not call after deleting the
569 /// original defining instruction.
570 Register getReg() const { return DefMI->getOperand(0).getReg(); }
571
572 /// Determines whether this rematerialization may be beneficial in at least
573 /// one target region.
574 bool maybeBeneficial(const BitVector &TargetRegions,
575 ArrayRef<GCNRPTarget> RPTargets) const;
576
577 /// Determines if the register is both unused and live-through in region \p
578 /// I. This guarantees that rematerializing it will reduce RP in the region.
579 bool isUnusedLiveThrough(unsigned I) const {
580 assert(I < Live.size() && "region index out of range");
581 return LiveIn[I] && LiveOut[I] && I != UseRegion;
582 }
583
584 /// Updates internal structures following a MI rematerialization. Part of
585 /// the stage instead of the DAG because it makes assumptions that are
586 /// specific to the rematerialization process.
587 void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
589 };
590
591 /// A scored rematerialization candidate. Higher scores indicate more
592 /// beneficial rematerializations. A null score indicate the rematerialization
593 /// is not helpful to reduce RP in target regions.
594 struct ScoredRemat {
595 /// The rematerializable register under consideration.
596 RematReg *Remat;
597
598 /// Execution frequency information required by scoring heuristics.
599 /// Frequencies are scaled down if they are high to avoid overflow/underflow
600 /// when combining them.
601 struct FreqInfo {
602 /// Per-region execution frequencies. 0 when unknown.
604 /// Minimum and maximum observed frequencies.
606
608
609 private:
610 static const uint64_t ScaleFactor = 1024;
611 };
612
613 /// This only initializes state-independent characteristics of \p Remat, not
614 /// the actual score.
615 ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
617
618 /// Rematerializes the candidate and returns the new MI. This removes the
619 /// rematerialized register from live-in/out lists in the \p DAG and updates
620 /// \p RPTargets in all affected regions. Regions in which RP savings are
621 /// not guaranteed are set in \p RecomputeRP.
622 MachineInstr *rematerialize(BitVector &RecomputeRP,
625
626 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
627 /// \p RegionFreq indicates the frequency of each region
628 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
629 const FreqInfo &Freq, bool ReduceSpill);
630
631 /// Returns whether the current score is null, indicating the
632 /// rematerialization is useless.
633 bool hasNullScore() const { return !RegionImpact; }
634
635 /// Compare score components of non-null scores pair-wise. A null score is
636 /// always strictly lesser than another non-null score.
637 bool operator<(const ScoredRemat &O) const {
638 if (hasNullScore())
639 return !O.hasNullScore();
640 if (O.hasNullScore())
641 return false;
642 if (MaxFreq != O.MaxFreq)
643 return MaxFreq < O.MaxFreq;
644 if (FreqDiff != O.FreqDiff)
645 return FreqDiff < O.FreqDiff;
646 if (RegionImpact != O.RegionImpact)
647 return RegionImpact < O.RegionImpact;
648 // Break ties using pointer to rematerializable register. Rematerializable
649 // registers are collected in instruction order so, within the same
650 // region, this will prefer registers defined earlier that have longer
651 // live ranges in their defining region (since the registers we consider
652 // are always live-out in their defining region).
653 return Remat > O.Remat;
654 }
655
656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
657 Printable print() const;
658#endif
659
660 private:
661 /// Expected register pressure decrease induced by rematerializing this
662 /// candidate.
663 GCNRegPressure RPSave;
664
665 // The three members below are the scoring components, top to bottom from
666 // most important to least important when comparing candidates.
667
668 /// Frequency of impacted target region with highest known frequency. This
669 /// only matters when the stage is trying to reduce spilling, so it is
670 /// always 0 when it is not.
671 uint64_t MaxFreq;
672 /// Frequency difference between defining and using regions. Negative values
673 /// indicate we are rematerializing to higher frequency regions; positive
674 /// values indicate the contrary.
675 int64_t FreqDiff;
676 /// Expected number of target regions impacted by the rematerialization,
677 /// scaled by the size of the register being rematerialized.
678 unsigned RegionImpact;
679
680 int64_t getFreqDiff(const FreqInfo &Freq) const;
681 };
682
683 /// Parent MBB to each region, in region order.
685 /// Register pressure targets for all regions.
686 SmallVector<GCNRPTarget> RPTargets;
687 /// Regions which are above the stage's RP target.
688 BitVector TargetRegions;
689 /// The target occupancy the set is trying to achieve. Empty when the
690 /// objective is spilling reduction.
691 std::optional<unsigned> TargetOcc;
692 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
693 unsigned AchievedOcc;
694 /// After successful stage initialization, indicates which regions should be
695 /// rescheduled.
696 BitVector RescheduleRegions;
697
698 /// List of rematerializable registers.
699 SmallVector<RematReg> RematRegs;
700
701 /// Holds enough information to rollback a rematerialization decision post
702 /// re-scheduling.
703 struct RollbackInfo {
704 /// The rematerializable register under consideration.
705 const RematReg *Remat;
706 /// The rematerialized MI replacing the original defining MI.
707 MachineInstr *RematMI;
708 /// Maps register machine operand indices to their original register.
709 SmallDenseMap<unsigned, Register, 4> RegMap;
710
711 RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
712 };
713 /// List of rematerializations to rollback if rematerialization does not end
714 /// up being beneficial.
715 SmallVector<RollbackInfo> Rollbacks;
716
717 /// State of a region pre-re-scheduling but post-rematerializations that we
718 /// must keep to be able to revert re-scheduling effects.
719 struct RegionSchedRevert {
720 /// Region number;
721 unsigned RegionIdx;
722 /// Original instruction order (both debug and non-debug MIs).
723 std::vector<MachineInstr *> OrigMIOrder;
724 /// Maximum pressure recorded in the region.
725 GCNRegPressure MaxPressure;
726
727 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
728 const GCNRegPressure &MaxPressure)
729 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
730 MaxPressure(MaxPressure) {}
731 };
732 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
733 /// regions.
734 SmallVector<RegionSchedRevert> RegionReverts;
735
736 /// Returns the occupancy the stage is trying to achieve.
737 unsigned getStageTargetOccupancy() const;
738
739 /// Determines the stage's objective (increasing occupancy or reducing
740 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
741 /// achieve that objective and mark those that don't achieve it in \ref
742 /// TargetRegions. Returns whether there is any target region.
743 bool setObjective();
744
745 /// Unsets target regions in \p Regions whose RP target has been reached.
746 void unsetSatisfiedRPTargets(const BitVector &Regions);
747
748 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
749 /// again all \ref TargetRegions that were optimistically marked as satisfied
750 /// but are actually not, and returns whether there were any such regions.
751 bool updateAndVerifyRPTargets(const BitVector &Regions);
752
753 /// Collects all rematerializable registers and appends them to \ref
754 /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any
755 /// rematerializable register was found.
756 bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion);
757
758 /// Deletes all rematerialized MIs from the MIR when they were kept around for
759 /// potential rollback.
760 void commitRematerializations() const;
761
762 /// Whether the MI is rematerializable
763 bool isReMaterializable(const MachineInstr &MI);
764
765 /// If remat alone did not increase occupancy to the target one, rollbacks all
766 /// rematerializations and resets live-ins/RP in all regions impacted by the
767 /// stage to their pre-stage values.
768 void finalizeGCNSchedStage() override;
769
770public:
771 bool initGCNSchedStage() override;
772
773 bool initGCNRegion() override;
774
775 void finalizeGCNRegion() override;
776
777 bool shouldRevertScheduling(unsigned WavesAfter) override;
778
780 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
781 RescheduleRegions(DAG.Regions.size()) {
782 const unsigned NumRegions = DAG.Regions.size();
783 RPTargets.reserve(NumRegions);
784 RegionBB.reserve(NumRegions);
785 }
786};
787
795
804
806private:
807 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
808
809 bool HasIGLPInstrs = false;
810
811public:
812 void schedule() override;
813
814 void finalizeSchedule() override;
815
817 std::unique_ptr<MachineSchedStrategy> S,
818 bool RemoveKillFlags);
819};
820
821} // End namespace llvm
822
823#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr unsigned SM(unsigned Version)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_type size() const
size - Returns the number of bits in this bitvector.
Definition BitVector.h:178
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
DenseMap< unsigned, LaneBitmask > LiveRegSet
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx in block MBB to MIOrder.
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > MaxPressure
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
const unsigned HighRPVGPRBias
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMILive * DAG
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNSchedStage() override
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::iterator iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)