LLVM 22.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Uses profile information to split out cold blocks.
11//
12// This pass splits out cold machine basic blocks from the parent function. This
13// implementation leverages the basic block section framework. Blocks marked
14// cold by this pass are grouped together in a separate section prefixed with
15// ".text.unlikely.*". The linker can then group these together as a cold
16// section. The split part of the function is a contiguous region identified by
17// the symbol "foo.cold". Grouping all cold blocks across functions together
18// decreases fragmentation and improves icache and itlb utilization. Note that
19// the overall changes to the binary size are negligible; only a small number of
20// additional jump instructions may be introduced.
21//
22// For the original RFC of this pass please see
23// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24//===----------------------------------------------------------------------===//
25
37#include "llvm/CodeGen/Passes.h"
39#include "llvm/IR/Function.h"
42#include <optional>
43
44using namespace llvm;
45
46// FIXME: This cutoff value is CPU dependent and should be moved to
47// TargetTransformInfo once we consider enabling this on other platforms.
48// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50// The default was empirically determined to be optimal when considering cutoff
51// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52// Intel CPUs.
54 PercentileCutoff("mfs-psi-cutoff",
55 cl::desc("Percentile profile summary cutoff used to "
56 "determine cold blocks. Unused if set to zero."),
57 cl::init(999950), cl::Hidden);
58
60 "mfs-count-threshold",
62 "Minimum number of times a block must be executed to be retained."),
64
66 "mfs-split-ehcode",
67 cl::desc("Splits all EH code and it's descendants by default."),
68 cl::init(false), cl::Hidden);
69
70namespace {
71
72class MachineFunctionSplitter : public MachineFunctionPass {
73public:
74 static char ID;
75 MachineFunctionSplitter() : MachineFunctionPass(ID) {
77 }
78
79 StringRef getPassName() const override {
80 return "Machine Function Splitter Transformation";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override;
84
85 bool runOnMachineFunction(MachineFunction &F) override;
86};
87} // end anonymous namespace
88
89/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
90/// only by EH pad as cold. This will help mark EH pads statically cold
91/// instead of relying on profile data.
94 computeEHOnlyBlocks(MF, EHBlocks);
95 for (auto Block : EHBlocks) {
97 }
98}
99
101 auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
102 return X.getSectionID().Type < Y.getSectionID().Type;
103 };
106}
107
109 const MachineBlockFrequencyInfo *MBFI,
110 ProfileSummaryInfo *PSI) {
111 std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
112 // For instrumentation profiles and sample profiles, we use different ways
113 // to judge whether a block is cold and should be split.
115 // If using instrument profile, which is deemed "accurate", no count means
116 // cold.
117 if (!Count)
118 return true;
119 if (PercentileCutoff > 0)
121 // Fallthrough to end of function.
122 } else if (PSI->hasSampleProfile()) {
123 // For sample profile, no count means "do not judege coldness".
124 if (!Count)
125 return false;
126 }
127
128 return (*Count < ColdCountThreshold);
129}
130
131bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
132 if (skipFunction(MF.getFunction()))
133 return false;
134
135 // Do not split functions when -basic-block-sections=all is specified.
137 return false;
138 // We target functions with profile data. Static information in the form
139 // of exception handling code may be split to cold if user passes the
140 // mfs-split-ehcode flag.
141 bool UseProfileData = MF.getFunction().hasProfileData();
142 if (!UseProfileData && !SplitAllEHCode)
143 return false;
144
145 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
146 if (!TII.isFunctionSafeToSplit(MF))
147 return false;
148
149 // Do not split functions with BasicBlockSections profiles as they will
150 // be split by the BasicBlockSections pass.
151 auto BBSectionsProfile =
152 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
153 if (BBSectionsProfile != nullptr &&
154 BBSectionsProfile->getBBSPR().isFunctionHot(MF.getName()))
155 return false;
156
157 // Renumbering blocks here preserves the order of the blocks as
158 // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
159 // blocks. Preserving the order of blocks is essential to retaining decisions
160 // made by prior passes such as MachineBlockPlacement.
161 MF.RenumberBlocks();
162 MF.setBBSectionsType(BasicBlockSection::Preset);
163
164 MachineBlockFrequencyInfo *MBFI = nullptr;
165 ProfileSummaryInfo *PSI = nullptr;
166 if (UseProfileData) {
167 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
168 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
169 // If we don't have a good profile (sample profile is not deemed
170 // as a "good profile") and the function is not hot, then early
171 // return. (Because we can only trust hot functions when profile
172 // quality is not good.)
173 if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
174 // Split all EH code and it's descendant statically by default.
175 if (SplitAllEHCode)
178 return true;
179 }
180 }
181
183 for (auto &MBB : MF) {
184 if (MBB.isEntryBlock())
185 continue;
186
187 if (MBB.isEHPad())
188 LandingPads.push_back(&MBB);
189 else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) &&
190 TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode)
192 }
193
194 // Split all EH code and it's descendant statically by default.
195 if (SplitAllEHCode)
197 // We only split out eh pads if all of them are cold.
198 else {
199 // Here we have UseProfileData == true.
200 bool HasHotLandingPads = false;
201 for (const MachineBasicBlock *LP : LandingPads) {
202 if (!isColdBlock(*LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(*LP))
203 HasHotLandingPads = true;
204 }
205 if (!HasHotLandingPads) {
206 for (MachineBasicBlock *LP : LandingPads)
207 LP->setSectionID(MBBSectionID::ColdSectionID);
208 }
209 }
210
212 return true;
213}
214
215void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
216 AU.addRequired<MachineModuleInfoWrapperPass>();
217 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
218 AU.addRequired<ProfileSummaryInfoWrapperPass>();
219 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
220}
221
222char MachineFunctionSplitter::ID = 0;
223INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
224 "Split machine functions using profile information", false,
225 false)
226
228 return new MachineFunctionSplitter();
229}
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition MD5.cpp:55
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF)
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static void setDescendantEHBlocksCold(MachineFunction &MF)
setDescendantEHBlocksCold - This splits all EH pads and blocks reachable only by EH pad as cold.
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
static cl::opt< bool > SplitAllEHCode("mfs-split-ehcode", cl::desc("Splits all EH code and it's descendants by default."), cl::init(false), cl::Hidden)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition Function.h:334
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI bool isEntryBlock() const
Returns true if this is the entry block of the function.
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void setBBSectionsType(BasicBlockSection V)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis providing profile information.
bool hasCSInstrumentationProfile() const
Returns true if module M has context sensitive instrumentation profile.
bool hasInstrumentationProfile() const
Returns true if module M has instrumentation profile.
bool hasSampleProfile() const
Returns true if module M has sample profile.
LLVM_ABI bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
void push_back(const T &Elt)
llvm::BasicBlockSection getBBSectionsType() const
If basic blocks should be emitted into their own section, corresponding to -fbasic-block-sections.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
LLVM_ABI void initializeMachineFunctionSplitterPass(PassRegistry &)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
static void computeEHOnlyBlocks(FunctionT &F, DenseSet< BlockT * > &EHBlocks)
Compute a list of blocks that are only reachable via EH paths.
Definition EHUtils.h:18
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
void avoidZeroOffsetLandingPad(MachineFunction &MF)
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
LLVM_ABI static const MBBSectionID ColdSectionID