37#define INSTR_PROF_VALUE_PROF_MEMOP_API
52#define DEBUG_TYPE "pgo-memop-opt"
54STATISTIC(NumOfPGOMemOPOpt,
"Number of memop intrinsics optimized.");
55STATISTIC(NumOfPGOMemOPAnnotate,
"Number of memop intrinsics annotated.");
62 cl::desc(
"The minimum count to optimize memory "
74 cl::desc(
"The percentage threshold for the "
75 "memory intrinsic calls optimization"));
80 cl::desc(
"The max version for the optimized memory "
86 cl::desc(
"Scale the memop size counts using the basic "
87 " block count value"));
92 cl::desc(
"Size-specialize memcmp and bcmp calls"));
96 cl::desc(
"Optimize the memop size <= this value"));
103 switch (
MI->getIntrinsicID()) {
104 case Intrinsic::memcpy:
106 case Intrinsic::memmove:
108 case Intrinsic::memset:
118 MemOp(MemIntrinsic *
MI) : I(
MI) {}
119 MemOp(CallInst *CI) : I(CI) {}
123 if (
auto MI = asMI())
128 if (
auto MI = asMI())
129 return MI->getLength();
130 return asCI()->getArgOperand(2);
133 if (
auto MI = asMI())
135 asCI()->setArgOperand(2,
Length);
137 StringRef getFuncName() {
138 if (
auto MI = asMI())
139 return MI->getCalledFunction()->getName();
140 return asCI()->getCalledFunction()->getName();
143 if (
auto MI = asMI())
144 if (
MI->getIntrinsicID() == Intrinsic::memmove)
148 bool isMemcmp(TargetLibraryInfo &TLI) {
150 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
151 Func == LibFunc_memcmp) {
156 bool isBcmp(TargetLibraryInfo &TLI) {
158 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
159 Func == LibFunc_bcmp) {
164 const char *
getName(TargetLibraryInfo &TLI) {
165 if (
auto MI = asMI())
166 return getMIName(
MI);
169 if (Func == LibFunc_memcmp)
171 if (Func == LibFunc_bcmp)
179class MemOPSizeOpt :
public InstVisitor<MemOPSizeOpt> {
181 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
182 OptimizationRemarkEmitter &ORE, DominatorTree *DT,
183 TargetLibraryInfo &TLI)
184 : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(
false) {}
185 bool isChanged()
const {
return Changed; }
190 for (
auto &MO : WorkList) {
191 ++NumOfPGOMemOPAnnotate;
196 <<
"is Transformed.\n");
201 void visitMemIntrinsic(MemIntrinsic &
MI) {
206 WorkList.push_back(MemOp(&
MI));
209 void visitCallInst(CallInst &CI) {
211 if (TLI.getLibFunc(CI, Func) &&
212 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
214 WorkList.push_back(MemOp(&CI));
220 BlockFrequencyInfo &BFI;
221 OptimizationRemarkEmitter &ORE;
223 TargetLibraryInfo &TLI;
225 std::vector<MemOp> WorkList;
226 bool perform(MemOp MO);
244 return ScaleCount / Denom;
247bool MemOPSizeOpt::perform(
MemOp MO) {
254 uint32_t MaxNumVals = INSTR_PROF_NUM_BUCKETS;
261 uint64_t ActualCount = TotalCount;
262 uint64_t SavedTotalCount = TotalCount;
267 ActualCount = *BBEdgeCount;
270 LLVM_DEBUG(
dbgs() <<
"Read one memory intrinsic profile with count "
271 << ActualCount <<
"\n");
274 : VDs) {
dbgs() <<
" (" << VD.Value <<
"," << VD.Count <<
")\n"; });
283 TotalCount = ActualCount;
286 <<
" denominator = " << SavedTotalCount <<
"\n");
289 uint64_t RemainCount = TotalCount;
290 uint64_t SavedRemainCount = SavedTotalCount;
291 SmallVector<uint64_t, 16> SizeIds;
292 SmallVector<uint64_t, 16> CaseCounts;
293 SmallDenseSet<uint64_t, 16> SeenSizeId;
294 uint64_t MaxCount = 0;
299 for (
auto I = VDs.begin(),
E = VDs.end();
I !=
E; ++
I) {
301 int64_t
V = VD.Value;
302 uint64_t
C = VD.Count;
304 C = getScaledCount(
C, ActualCount, SavedTotalCount);
318 if (!SeenSizeId.
insert(V).second) {
319 errs() <<
"warning: Invalid Profile Data in Function " <<
Func.getName()
320 <<
": Two identical values in MemOp value counts.\n";
331 assert(SavedRemainCount >= VD.Count);
332 SavedRemainCount -= VD.Count;
343 CaseCounts[0] = RemainCount;
344 if (RemainCount > MaxCount)
345 MaxCount = RemainCount;
347 uint64_t SumForOpt = TotalCount - RemainCount;
350 <<
" Versions (covering " << SumForOpt <<
" out of "
351 << TotalCount <<
")\n");
379 MergeBB->
setName(
"MemOP.Merge");
381 DefaultBB->
setName(
"MemOP.Default");
383 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
384 auto &Ctx =
Func.getContext();
387 Value *SizeVar = MO.getLength();
388 SwitchInst *
SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.
size());
389 Type *MemOpTy = MO.I->getType();
390 PHINode *
PHI =
nullptr;
394 PHI = IRBM.CreatePHI(MemOpTy, SizeIds.
size() + 1,
"MemOP.RVMerge");
395 MO.I->replaceAllUsesWith(
PHI);
396 PHI->addIncoming(MO.I, DefaultBB);
400 MO.I->setMetadata(LLVMContext::MD_prof,
nullptr);
402 if (SavedRemainCount > 0 ||
Version != VDs.size()) {
405 IPVK_MemOPSize, VDs.
size());
410 std::vector<DominatorTree::UpdateType> Updates;
412 Updates.reserve(2 * SizeIds.
size());
414 for (uint64_t SizeId : SizeIds) {
416 Ctx, Twine(
"MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
417 MemOp NewMO = MO.clone();
420 assert(SizeType &&
"Expected integer type size argument.");
421 ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
422 NewMO.setLength(CaseSizeId);
423 NewMO.I->insertInto(CaseBB, CaseBB->
end());
425 IRBCase.CreateBr(MergeBB);
426 SI->addCase(CaseSizeId, CaseBB);
428 PHI->addIncoming(NewMO.I, CaseBB);
430 Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
431 Updates.push_back({DominatorTree::Insert, BB, CaseBB});
435 DTU.applyUpdates(Updates);
447 return OptimizationRemark(
DEBUG_TYPE,
"memopt-opt", MO.I)
448 <<
"optimized " <<
NV(
"Memop", MO.getName(TLI)) <<
" with count "
449 <<
NV(
"Count", SumForOpt) <<
" out of " <<
NV(
"Total", TotalCount)
450 <<
" for " <<
NV(
"Versions",
Version) <<
" versions";
465 MemOPSizeOpt MemOPSizeOpt(
F, BFI, ORE, DT, TLI);
466 MemOPSizeOpt.perform();
467 return MemOPSizeOpt.isChanged();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Function Alias Analysis false
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This header defines various interfaces for pass management in LLVM.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Value * getArgOperand(unsigned i) const
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for instruction visitors.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
This is the common base class for memset/memcpy/memmove.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::desc("The max version for the optimized memory " " intrinsic calls"))
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.