LLVM 23.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
46#include "llvm/Config/llvm-config.h"
47#include "llvm/IR/Argument.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/BasicBlock.h"
50#include "llvm/IR/CFG.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
56#include "llvm/IR/Dominators.h"
57#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalValue.h"
61#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/InlineAsm.h"
63#include "llvm/IR/InstrTypes.h"
64#include "llvm/IR/Instruction.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsAArch64.h"
69#include "llvm/IR/LLVMContext.h"
70#include "llvm/IR/MDBuilder.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/Operator.h"
75#include "llvm/IR/Statepoint.h"
76#include "llvm/IR/Type.h"
77#include "llvm/IR/Use.h"
78#include "llvm/IR/User.h"
79#include "llvm/IR/Value.h"
80#include "llvm/IR/ValueHandle.h"
81#include "llvm/IR/ValueMap.h"
83#include "llvm/Pass.h"
89#include "llvm/Support/Debug.h"
99#include <algorithm>
100#include <cassert>
101#include <cstdint>
102#include <iterator>
103#include <limits>
104#include <memory>
105#include <optional>
106#include <utility>
107#include <vector>
108
109using namespace llvm;
110using namespace llvm::PatternMatch;
111
112#define DEBUG_TYPE "codegenprepare"
113
114STATISTIC(NumBlocksElim, "Number of blocks eliminated");
115STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
116STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
117STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
118 "sunken Cmps");
119STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
120 "of sunken Casts");
121STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
122 "computations were sunk");
123STATISTIC(NumMemoryInstsPhiCreated,
124 "Number of phis created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumMemoryInstsSelectCreated,
127 "Number of select created when address "
128 "computations were sunk to memory instructions");
129STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
130STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
131STATISTIC(NumAndsAdded,
132 "Number of and mask instructions added to form ext loads");
133STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
134STATISTIC(NumRetsDup, "Number of return instructions duplicated");
135STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
136STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
137STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
138
140 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
141 cl::desc("Disable branch optimizations in CodeGenPrepare"));
142
143static cl::opt<bool>
144 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
145 cl::desc("Disable GC optimizations in CodeGenPrepare"));
146
147static cl::opt<bool>
148 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
149 cl::init(false),
150 cl::desc("Disable select to branch conversion."));
151
152static cl::opt<bool>
153 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
154 cl::desc("Address sinking in CGP using GEPs."));
155
156static cl::opt<bool>
157 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
158 cl::desc("Enable sinking and/cmp into branches."));
159
161 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
162 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
163
165 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
166 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
167
169 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
170 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
171 "CodeGenPrepare"));
172
174 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
175 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
176 "optimization in CodeGenPrepare"));
177
179 "disable-preheader-prot", cl::Hidden, cl::init(false),
180 cl::desc("Disable protection against removing loop preheaders"));
181
183 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
184 cl::desc("Use profile info to add section prefix for hot/cold functions"));
185
187 "profile-unknown-in-special-section", cl::Hidden,
188 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
189 "profile, we cannot tell the function is cold for sure because "
190 "it may be a function newly added without ever being sampled. "
191 "With the flag enabled, compiler can put such profile unknown "
192 "functions into a special section, so runtime system can choose "
193 "to handle it in a different way than .text section, to save "
194 "RAM for example. "));
195
197 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
198 cl::desc("Use the basic-block-sections profile to determine the text "
199 "section prefix for hot functions. Functions with "
200 "basic-block-sections profile will be placed in `.text.hot` "
201 "regardless of their FDO profile info. Other functions won't be "
202 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
203 "profiles."));
204
206 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
207 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
208 "(frequency of destination block) is greater than this ratio"));
209
211 "force-split-store", cl::Hidden, cl::init(false),
212 cl::desc("Force store splitting no matter what the target query says."));
213
215 "cgp-type-promotion-merge", cl::Hidden,
216 cl::desc("Enable merging of redundant sexts when one is dominating"
217 " the other."),
218 cl::init(true));
219
221 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
222 cl::desc("Disables combining addressing modes with different parts "
223 "in optimizeMemoryInst."));
224
225static cl::opt<bool>
226 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
227 cl::desc("Allow creation of Phis in Address sinking."));
228
230 "addr-sink-new-select", cl::Hidden, cl::init(true),
231 cl::desc("Allow creation of selects in Address sinking."));
232
234 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
235 cl::desc("Allow combining of BaseReg field in Address sinking."));
236
238 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
239 cl::desc("Allow combining of BaseGV field in Address sinking."));
240
242 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
243 cl::desc("Allow combining of BaseOffs field in Address sinking."));
244
246 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
247 cl::desc("Allow combining of ScaledReg field in Address sinking."));
248
249static cl::opt<bool>
250 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
251 cl::init(true),
252 cl::desc("Enable splitting large offset of GEP."));
253
255 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
256 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
257
258static cl::opt<bool>
259 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
260 cl::desc("Enable BFI update verification for "
261 "CodeGenPrepare."));
262
263static cl::opt<bool>
264 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
265 cl::desc("Enable converting phi types in CodeGenPrepare"));
266
268 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
269 cl::desc("Least BB number of huge function."));
270
272 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
274 cl::desc("Max number of address users to look at"));
275
276static cl::opt<bool>
277 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
278 cl::desc("Disable elimination of dead PHI nodes."));
279
280namespace {
281
282enum ExtType {
283 ZeroExtension, // Zero extension has been seen.
284 SignExtension, // Sign extension has been seen.
285 BothExtension // This extension type is used if we saw sext after
286 // ZeroExtension had been set, or if we saw zext after
287 // SignExtension had been set. It makes the type
288 // information of a promoted instruction invalid.
289};
290
291enum ModifyDT {
292 NotModifyDT, // Not Modify any DT.
293 ModifyBBDT, // Modify the Basic Block Dominator Tree.
294 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
295 // This usually means we move/delete/insert instruction
296 // in a Basic Block. So we should re-iterate instructions
297 // in such Basic Block.
298};
299
300using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
301using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
302using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
304using ValueToSExts = MapVector<Value *, SExts>;
305
306class TypePromotionTransaction;
307
308class CodeGenPrepare {
309 friend class CodeGenPrepareLegacyPass;
310 const TargetMachine *TM = nullptr;
311 const TargetSubtargetInfo *SubtargetInfo = nullptr;
312 const TargetLowering *TLI = nullptr;
313 const TargetRegisterInfo *TRI = nullptr;
314 const TargetTransformInfo *TTI = nullptr;
315 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
316 const TargetLibraryInfo *TLInfo = nullptr;
317 DomTreeUpdater *DTU = nullptr;
318 LoopInfo *LI = nullptr;
319 BlockFrequencyInfo *BFI;
320 BranchProbabilityInfo *BPI;
321 ProfileSummaryInfo *PSI = nullptr;
322
323 /// As we scan instructions optimizing them, this is the next instruction
324 /// to optimize. Transforms that can invalidate this should update it.
325 BasicBlock::iterator CurInstIterator;
326
327 /// Keeps track of non-local addresses that have been sunk into a block.
328 /// This allows us to avoid inserting duplicate code for blocks with
329 /// multiple load/stores of the same address. The usage of WeakTrackingVH
330 /// enables SunkAddrs to be treated as a cache whose entries can be
331 /// invalidated if a sunken address computation has been erased.
332 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
333
334 /// Keeps track of all instructions inserted for the current function.
335 SetOfInstrs InsertedInsts;
336
337 /// Keeps track of the type of the related instruction before their
338 /// promotion for the current function.
339 InstrToOrigTy PromotedInsts;
340
341 /// Keep track of instructions removed during promotion.
342 SetOfInstrs RemovedInsts;
343
344 /// Keep track of sext chains based on their initial value.
345 DenseMap<Value *, Instruction *> SeenChainsForSExt;
346
347 /// Keep track of GEPs accessing the same data structures such as structs or
348 /// arrays that are candidates to be split later because of their large
349 /// size.
350 MapVector<AssertingVH<Value>,
352 LargeOffsetGEPMap;
353
354 /// Keep track of new GEP base after splitting the GEPs having large offset.
355 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
356
357 /// Map serial numbers to Large offset GEPs.
358 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
359
360 /// Keep track of SExt promoted.
361 ValueToSExts ValToSExtendedUses;
362
363 /// True if the function has the OptSize attribute.
364 bool OptSize;
365
366 /// DataLayout for the Function being processed.
367 const DataLayout *DL = nullptr;
368
369public:
370 CodeGenPrepare() = default;
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
379 SmallPtrSet<BasicBlock *, 32> FreshBBs;
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 }
387
388 bool run(Function &F, FunctionAnalysisManager &AM);
389
390private:
391 template <typename F>
392 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
393 // Substituting can cause recursive simplifications, which can invalidate
394 // our iterator. Use a WeakTrackingVH to hold onto it in case this
395 // happens.
396 Value *CurValue = &*CurInstIterator;
397 WeakTrackingVH IterHandle(CurValue);
398
399 f();
400
401 // If the iterator instruction was recursively deleted, start over at the
402 // start of the block.
403 if (IterHandle != CurValue) {
404 CurInstIterator = BB->begin();
405 SunkAddrs.clear();
406 }
407 }
408
409 // Get the DominatorTree, updating it if necessary.
410 DominatorTree &getDT() { return DTU->getDomTree(); }
411
412 void removeAllAssertingVHReferences(Value *V);
413 bool eliminateAssumptions(Function &F);
414 bool eliminateFallThrough(Function &F);
415 bool eliminateMostlyEmptyBlocks(Function &F, bool &ResetLI);
416 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
417 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
418 bool eliminateMostlyEmptyBlock(BasicBlock *BB);
419 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
420 bool isPreheader);
421 bool makeBitReverse(Instruction &I);
422 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
423 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
424 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
425 unsigned AddrSpace);
426 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
427 bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
428 ModifyDT &ModifiedDT);
429 bool optimizeInlineAsmInst(CallInst *CS);
430 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
431 bool optimizeExt(Instruction *&I);
432 bool optimizeExtUses(Instruction *I);
433 bool optimizeLoadExt(LoadInst *Load);
434 bool optimizeShiftInst(BinaryOperator *BO);
435 bool optimizeFunnelShift(IntrinsicInst *Fsh);
436 bool optimizeSelectInst(SelectInst *SI);
437 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
438 bool optimizeSwitchType(SwitchInst *SI);
439 bool optimizeSwitchPhiConstants(SwitchInst *SI);
440 bool optimizeSwitchInst(SwitchInst *SI);
441 bool optimizeExtractElementInst(Instruction *Inst);
442 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
443 bool fixupDbgVariableRecord(DbgVariableRecord &I);
444 bool fixupDbgVariableRecordsOnInst(Instruction &I);
445 bool placeDbgValues(Function &F);
446 bool placePseudoProbes(Function &F);
447 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
448 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
449 bool tryToPromoteExts(TypePromotionTransaction &TPT,
450 const SmallVectorImpl<Instruction *> &Exts,
451 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
452 unsigned CreatedInstsCost = 0);
453 bool mergeSExts(Function &F);
454 bool splitLargeGEPOffsets();
455 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
456 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
457 bool optimizePhiTypes(Function &F);
458 bool performAddressTypePromotion(
459 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
460 bool HasPromoted, TypePromotionTransaction &TPT,
461 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
462 bool splitBranchCondition(Function &F);
463 bool simplifyOffsetableRelocate(GCStatepointInst &I);
464
465 bool tryToSinkFreeOperands(Instruction *I);
466 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
467 CmpInst *Cmp, Intrinsic::ID IID);
468 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
469 bool optimizeURem(Instruction *Rem);
470 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
471 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
472 bool unfoldPowerOf2Test(CmpInst *Cmp);
473 void verifyBFIUpdates(Function &F);
474 bool _run(Function &F);
475};
476
477class CodeGenPrepareLegacyPass : public FunctionPass {
478public:
479 static char ID; // Pass identification, replacement for typeid
480
481 CodeGenPrepareLegacyPass() : FunctionPass(ID) {}
482
483 bool runOnFunction(Function &F) override;
484
485 StringRef getPassName() const override { return "CodeGen Prepare"; }
486
487 void getAnalysisUsage(AnalysisUsage &AU) const override {
488 // FIXME: When we can selectively preserve passes, preserve the domtree.
489 AU.addRequired<ProfileSummaryInfoWrapperPass>();
490 AU.addRequired<TargetLibraryInfoWrapperPass>();
491 AU.addRequired<TargetPassConfig>();
492 AU.addRequired<TargetTransformInfoWrapperPass>();
493 AU.addRequired<DominatorTreeWrapperPass>();
494 AU.addRequired<LoopInfoWrapperPass>();
495 AU.addRequired<BranchProbabilityInfoWrapperPass>();
496 AU.addRequired<BlockFrequencyInfoWrapperPass>();
497 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
498 }
499};
500
501} // end anonymous namespace
502
503char CodeGenPrepareLegacyPass::ID = 0;
504
505bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
506 if (skipFunction(F))
507 return false;
508 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
509 CodeGenPrepare CGP(TM);
510 CGP.DL = &F.getDataLayout();
511 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
512 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
513 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
514 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
515 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
516 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
517 CGP.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
518 CGP.BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
519 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
520 auto BBSPRWP =
521 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
522 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
523 DomTreeUpdater DTUpdater(
524 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
525 DomTreeUpdater::UpdateStrategy::Lazy);
526 CGP.DTU = &DTUpdater;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
540INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541 "Optimize for code generation", false, false)
542
544 return new CodeGenPrepareLegacyPass();
545}
546
549 CodeGenPrepare CGP(TM);
550
551 bool Changed = CGP.run(F, AM);
552 if (!Changed)
553 return PreservedAnalyses::all();
554
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
576 DomTreeUpdater::UpdateStrategy::Lazy);
577 DTU = &DTUpdater;
578 return _run(F);
579}
580
581bool CodeGenPrepare::_run(Function &F) {
582 bool EverMadeChange = false;
583
584 OptSize = F.hasOptSize();
585 // Use the basic-block-sections profile to promote hot functions to .text.hot
586 // if requested.
587 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
588 BBSectionsProfileReader->isFunctionHot(F.getName())) {
589 (void)F.setSectionPrefix("hot");
590 } else if (ProfileGuidedSectionPrefix) {
591 // The hot attribute overwrites profile count based hotness while profile
592 // counts based hotness overwrite the cold attribute.
593 // This is a conservative behabvior.
594 if (F.hasFnAttribute(Attribute::Hot) ||
595 PSI->isFunctionHotInCallGraph(&F, *BFI))
596 (void)F.setSectionPrefix("hot");
597 // If PSI shows this function is not hot, we will placed the function
598 // into unlikely section if (1) PSI shows this is a cold function, or
599 // (2) the function has a attribute of cold.
600 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
601 F.hasFnAttribute(Attribute::Cold))
602 (void)F.setSectionPrefix("unlikely");
605 (void)F.setSectionPrefix("unknown");
606 }
607
608 /// This optimization identifies DIV instructions that can be
609 /// profitably bypassed and carried out with a shorter, faster divide.
610 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
611 const DenseMap<unsigned int, unsigned int> &BypassWidths =
613 BasicBlock *BB = &*F.begin();
614 while (BB != nullptr) {
615 // bypassSlowDivision may create new BBs, but we don't want to reapply the
616 // optimization to those blocks.
617 BasicBlock *Next = BB->getNextNode();
618 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI))
619 EverMadeChange |= bypassSlowDivision(BB, BypassWidths, DTU, LI);
620 BB = Next;
621 }
622 }
623
624 // Get rid of @llvm.assume builtins before attempting to eliminate empty
625 // blocks, since there might be blocks that only contain @llvm.assume calls
626 // (plus arguments that we can get rid of).
627 EverMadeChange |= eliminateAssumptions(F);
628
629 auto resetLoopInfo = [this]() {
630 LI->releaseMemory();
631 LI->analyze(DTU->getDomTree());
632 };
633
634 // Eliminate blocks that contain only PHI nodes and an
635 // unconditional branch.
636 bool ResetLI = false;
637 EverMadeChange |= eliminateMostlyEmptyBlocks(F, ResetLI);
638 if (ResetLI)
639 resetLoopInfo();
640
642 EverMadeChange |= splitBranchCondition(F);
643
644 // Split some critical edges where one of the sources is an indirect branch,
645 // to help generate sane code for PHIs involving such edges.
646 bool Split = SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true,
647 BPI, BFI, DTU);
648 EverMadeChange |= Split;
649 if (Split)
650 resetLoopInfo();
651
652#ifndef NDEBUG
653 if (VerifyDomInfo)
654 assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
655 "Incorrect DominatorTree updates in CGP");
656
657 if (VerifyLoopInfo)
658 LI->verify(getDT());
659#endif
660
661 // If we are optimzing huge function, we need to consider the build time.
662 // Because the basic algorithm's complex is near O(N!).
663 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
664
665 bool MadeChange = true;
666 bool FuncIterated = false;
667 while (MadeChange) {
668 MadeChange = false;
669
670 // This is required because optimizeBlock() calls getDT() inside the loop
671 // below, which flushes pending updates and may delete dead blocks, leading
672 // to iterator invalidation.
673 DTU->flush();
674
675 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
676 if (FuncIterated && !FreshBBs.contains(&BB))
677 continue;
678
679 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
680 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
681
682 MadeChange |= Changed;
683 if (IsHugeFunc) {
684 // If the BB is updated, it may still has chance to be optimized.
685 // This usually happen at sink optimization.
686 // For example:
687 //
688 // bb0:
689 // %and = and i32 %a, 4
690 // %cmp = icmp eq i32 %and, 0
691 //
692 // If the %cmp sink to other BB, the %and will has chance to sink.
693 if (Changed)
694 FreshBBs.insert(&BB);
695 else if (FuncIterated)
696 FreshBBs.erase(&BB);
697 } else {
698 // For small/normal functions, we restart BB iteration if the dominator
699 // tree of the Function was changed.
700 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
701 break;
702 }
703 }
704 // We have iterated all the BB in the (only work for huge) function.
705 FuncIterated = IsHugeFunc;
706
707 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
708 MadeChange |= mergeSExts(F);
709 if (!LargeOffsetGEPMap.empty())
710 MadeChange |= splitLargeGEPOffsets();
711 MadeChange |= optimizePhiTypes(F);
712
713 if (MadeChange)
714 eliminateFallThrough(F);
715
716#ifndef NDEBUG
717 if (VerifyDomInfo)
718 assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
719 "Incorrect DominatorTree updates in CGP");
720
721 if (VerifyLoopInfo)
722 LI->verify(getDT());
723#endif
724
725 // Really free removed instructions during promotion.
726 for (Instruction *I : RemovedInsts)
727 I->deleteValue();
728
729 EverMadeChange |= MadeChange;
730 SeenChainsForSExt.clear();
731 ValToSExtendedUses.clear();
732 RemovedInsts.clear();
733 LargeOffsetGEPMap.clear();
734 LargeOffsetGEPID.clear();
735 }
736
737 NewGEPBases.clear();
738 SunkAddrs.clear();
739
740 // LoopInfo is not needed anymore and ConstantFoldTerminator can break it.
741 LI = nullptr;
742
743 if (!DisableBranchOpts) {
744 MadeChange = false;
745 // Use a set vector to get deterministic iteration order. The order the
746 // blocks are removed may affect whether or not PHI nodes in successors
747 // are removed.
748 SmallSetVector<BasicBlock *, 8> WorkList;
749 for (BasicBlock &BB : F) {
751 MadeChange |= ConstantFoldTerminator(&BB, true, nullptr, DTU);
752 if (!MadeChange)
753 continue;
754
755 for (BasicBlock *Succ : Successors)
756 if (pred_empty(Succ))
757 WorkList.insert(Succ);
758 }
759
760 // Delete the dead blocks and any of their dead successors.
761 MadeChange |= !WorkList.empty();
762 while (!WorkList.empty()) {
763 BasicBlock *BB = WorkList.pop_back_val();
765
766 DeleteDeadBlock(BB, DTU);
767
768 for (BasicBlock *Succ : Successors)
769 if (pred_empty(Succ))
770 WorkList.insert(Succ);
771 }
772
773 // Flush pending DT updates in order to finalise deletion of dead blocks.
774 DTU->flush();
775
776 // Merge pairs of basic blocks with unconditional branches, connected by
777 // a single edge.
778 if (EverMadeChange || MadeChange)
779 MadeChange |= eliminateFallThrough(F);
780
781 EverMadeChange |= MadeChange;
782 }
783
784 if (!DisableGCOpts) {
786 for (BasicBlock &BB : F)
787 for (Instruction &I : BB)
788 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
789 Statepoints.push_back(SP);
790 for (auto &I : Statepoints)
791 EverMadeChange |= simplifyOffsetableRelocate(*I);
792 }
793
794 // Do this last to clean up use-before-def scenarios introduced by other
795 // preparatory transforms.
796 EverMadeChange |= placeDbgValues(F);
797 EverMadeChange |= placePseudoProbes(F);
798
799#ifndef NDEBUG
801 verifyBFIUpdates(F);
802#endif
803
804 return EverMadeChange;
805}
806
807bool CodeGenPrepare::eliminateAssumptions(Function &F) {
808 bool MadeChange = false;
809 for (BasicBlock &BB : F) {
810 CurInstIterator = BB.begin();
811 while (CurInstIterator != BB.end()) {
812 Instruction *I = &*(CurInstIterator++);
813 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
814 MadeChange = true;
815 Value *Operand = Assume->getOperand(0);
816 Assume->eraseFromParent();
817
818 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
819 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
820 });
821 }
822 }
823 }
824 return MadeChange;
825}
826
827/// An instruction is about to be deleted, so remove all references to it in our
828/// GEP-tracking data strcutures.
829void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
830 LargeOffsetGEPMap.erase(V);
831 NewGEPBases.erase(V);
832
834 if (!GEP)
835 return;
836
837 LargeOffsetGEPID.erase(GEP);
838
839 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
840 if (VecI == LargeOffsetGEPMap.end())
841 return;
842
843 auto &GEPVector = VecI->second;
844 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
845
846 if (GEPVector.empty())
847 LargeOffsetGEPMap.erase(VecI);
848}
849
850// Verify BFI has been updated correctly by recomputing BFI and comparing them.
851[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
852 DominatorTree NewDT(F);
853 LoopInfo NewLI(NewDT);
854 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
855 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
856 NewBFI.verifyMatch(*BFI);
857}
858
859/// Merge basic blocks which are connected by a single edge, where one of the
860/// basic blocks has a single successor pointing to the other basic block,
861/// which has a single predecessor.
862bool CodeGenPrepare::eliminateFallThrough(Function &F) {
863 bool Changed = false;
864 SmallPtrSet<BasicBlock *, 8> Preds;
865 // Scan all of the blocks in the function, except for the entry block.
866 for (auto &Block : llvm::drop_begin(F)) {
867 auto *BB = &Block;
868 if (DTU->isBBPendingDeletion(BB))
869 continue;
870 // If the destination block has a single pred, then this is a trivial
871 // edge, just collapse it.
872 BasicBlock *SinglePred = BB->getSinglePredecessor();
873
874 // Don't merge if BB's address is taken.
875 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
876 continue;
877
878 if (isa<UncondBrInst>(SinglePred->getTerminator())) {
879 Changed = true;
880 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
881
882 // Merge BB into SinglePred and delete it.
883 MergeBlockIntoPredecessor(BB, DTU, LI);
884 Preds.insert(SinglePred);
885
886 if (IsHugeFunc) {
887 // Update FreshBBs to optimize the merged BB.
888 FreshBBs.insert(SinglePred);
889 FreshBBs.erase(BB);
890 }
891 }
892 }
893
894 // (Repeatedly) merging blocks into their predecessors can create redundant
895 // debug intrinsics.
896 for (auto *Pred : Preds)
897 if (!DTU->isBBPendingDeletion(Pred))
899
900 return Changed;
901}
902
903/// Find a destination block from BB if BB is mergeable empty block.
904BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
905 // If this block doesn't end with an uncond branch, ignore it.
906 UncondBrInst *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
907 if (!BI)
908 return nullptr;
909
910 // If the instruction before the branch (skipping debug info) isn't a phi
911 // node, then other stuff is happening here.
912 BasicBlock::iterator BBI = BI->getIterator();
913 if (BBI != BB->begin()) {
914 --BBI;
915 if (!isa<PHINode>(BBI))
916 return nullptr;
917 }
918
919 // Do not break infinite loops.
920 BasicBlock *DestBB = BI->getSuccessor();
921 if (DestBB == BB)
922 return nullptr;
923
924 if (!canMergeBlocks(BB, DestBB))
925 DestBB = nullptr;
926
927 return DestBB;
928}
929
930/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
931/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
932/// edges in ways that are non-optimal for isel. Start by eliminating these
933/// blocks so we can split them the way we want them.
934bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F, bool &ResetLI) {
935 SmallPtrSet<BasicBlock *, 16> Preheaders;
936 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
937 while (!LoopList.empty()) {
938 Loop *L = LoopList.pop_back_val();
939 llvm::append_range(LoopList, *L);
940 if (BasicBlock *Preheader = L->getLoopPreheader())
941 Preheaders.insert(Preheader);
942 }
943
944 ResetLI = false;
945 bool MadeChange = false;
946 // Note that this intentionally skips the entry block.
947 for (auto &Block : llvm::drop_begin(F)) {
948 // Delete phi nodes that could block deleting other empty blocks.
950 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
951 }
952
953 for (auto &Block : llvm::drop_begin(F)) {
954 auto *BB = &Block;
955 if (DTU->isBBPendingDeletion(BB))
956 continue;
957 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
958 if (!DestBB ||
959 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
960 continue;
961
962 ResetLI |= eliminateMostlyEmptyBlock(BB);
963 MadeChange = true;
964 }
965 return MadeChange;
966}
967
968bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
969 BasicBlock *DestBB,
970 bool isPreheader) {
971 // Do not delete loop preheaders if doing so would create a critical edge.
972 // Loop preheaders can be good locations to spill registers. If the
973 // preheader is deleted and we create a critical edge, registers may be
974 // spilled in the loop body instead.
975 if (!DisablePreheaderProtect && isPreheader &&
976 !(BB->getSinglePredecessor() &&
978 return false;
979
980 // Skip merging if the block's successor is also a successor to any callbr
981 // that leads to this block.
982 // FIXME: Is this really needed? Is this a correctness issue?
983 for (BasicBlock *Pred : predecessors(BB)) {
984 if (isa<CallBrInst>(Pred->getTerminator()) &&
985 llvm::is_contained(successors(Pred), DestBB))
986 return false;
987 }
988
989 // Try to skip merging if the unique predecessor of BB is terminated by a
990 // switch or indirect branch instruction, and BB is used as an incoming block
991 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
992 // add COPY instructions in the predecessor of BB instead of BB (if it is not
993 // merged). Note that the critical edge created by merging such blocks wont be
994 // split in MachineSink because the jump table is not analyzable. By keeping
995 // such empty block (BB), ISel will place COPY instructions in BB, not in the
996 // predecessor of BB.
997 BasicBlock *Pred = BB->getUniquePredecessor();
998 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
1000 return true;
1001
1002 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
1003 return true;
1004
1005 // We use a simple cost heuristic which determine skipping merging is
1006 // profitable if the cost of skipping merging is less than the cost of
1007 // merging : Cost(skipping merging) < Cost(merging BB), where the
1008 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1009 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1010 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1011 // Freq(Pred) / Freq(BB) > 2.
1012 // Note that if there are multiple empty blocks sharing the same incoming
1013 // value for the PHIs in the DestBB, we consider them together. In such
1014 // case, Cost(merging BB) will be the sum of their frequencies.
1015
1016 if (!isa<PHINode>(DestBB->begin()))
1017 return true;
1018
1019 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1020
1021 // Find all other incoming blocks from which incoming values of all PHIs in
1022 // DestBB are the same as the ones from BB.
1023 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1024 if (DestBBPred == BB)
1025 continue;
1026
1027 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1028 return DestPN.getIncomingValueForBlock(BB) ==
1029 DestPN.getIncomingValueForBlock(DestBBPred);
1030 }))
1031 SameIncomingValueBBs.insert(DestBBPred);
1032 }
1033
1034 // See if all BB's incoming values are same as the value from Pred. In this
1035 // case, no reason to skip merging because COPYs are expected to be place in
1036 // Pred already.
1037 if (SameIncomingValueBBs.count(Pred))
1038 return true;
1039
1040 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1041 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1042
1043 for (auto *SameValueBB : SameIncomingValueBBs)
1044 if (SameValueBB->getUniquePredecessor() == Pred &&
1045 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1046 BBFreq += BFI->getBlockFreq(SameValueBB);
1047
1048 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1049 return !Limit || PredFreq <= *Limit;
1050}
1051
1052/// Return true if we can merge BB into DestBB if there is a single
1053/// unconditional branch between them, and BB contains no other non-phi
1054/// instructions.
1055bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1056 const BasicBlock *DestBB) const {
1057 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1058 // the successor. If there are more complex condition (e.g. preheaders),
1059 // don't mess around with them.
1060 for (const PHINode &PN : BB->phis()) {
1061 for (const User *U : PN.users()) {
1062 const Instruction *UI = cast<Instruction>(U);
1063 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1064 return false;
1065 // If User is inside DestBB block and it is a PHINode then check
1066 // incoming value. If incoming value is not from BB then this is
1067 // a complex condition (e.g. preheaders) we want to avoid here.
1068 if (UI->getParent() == DestBB) {
1069 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1070 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1071 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1072 if (Insn && Insn->getParent() == BB &&
1073 Insn->getParent() != UPN->getIncomingBlock(I))
1074 return false;
1075 }
1076 }
1077 }
1078 }
1079
1080 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1081 // and DestBB may have conflicting incoming values for the block. If so, we
1082 // can't merge the block.
1083 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1084 if (!DestBBPN)
1085 return true; // no conflict.
1086
1087 // Collect the preds of BB.
1088 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1089 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1090 // It is faster to get preds from a PHI than with pred_iterator.
1091 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1092 BBPreds.insert(BBPN->getIncomingBlock(i));
1093 } else {
1094 BBPreds.insert_range(predecessors(BB));
1095 }
1096
1097 // Walk the preds of DestBB.
1098 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1099 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1100 if (BBPreds.count(Pred)) { // Common predecessor?
1101 for (const PHINode &PN : DestBB->phis()) {
1102 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1103 const Value *V2 = PN.getIncomingValueForBlock(BB);
1104
1105 // If V2 is a phi node in BB, look up what the mapped value will be.
1106 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1107 if (V2PN->getParent() == BB)
1108 V2 = V2PN->getIncomingValueForBlock(Pred);
1109
1110 // If there is a conflict, bail out.
1111 if (V1 != V2)
1112 return false;
1113 }
1114 }
1115 }
1116
1117 return true;
1118}
1119
1120/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1121static void replaceAllUsesWith(Value *Old, Value *New,
1123 bool IsHuge) {
1124 auto *OldI = dyn_cast<Instruction>(Old);
1125 if (OldI) {
1126 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1127 UI != E; ++UI) {
1129 if (IsHuge)
1130 FreshBBs.insert(User->getParent());
1131 }
1132 }
1133 Old->replaceAllUsesWith(New);
1134}
1135
1136/// Eliminate a basic block that has only phi's and an unconditional branch in
1137/// it.
1138/// Indicate that the LoopInfo was modified only if it wasn't updated.
1139bool CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1140 UncondBrInst *BI = cast<UncondBrInst>(BB->getTerminator());
1141 BasicBlock *DestBB = BI->getSuccessor();
1142
1143 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1144 << *BB << *DestBB);
1145
1146 // If the destination block has a single pred, then this is a trivial edge,
1147 // just collapse it.
1148 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1149 if (SinglePred != DestBB) {
1150 assert(SinglePred == BB &&
1151 "Single predecessor not the same as predecessor");
1152 // Merge DestBB into SinglePred/BB and delete it.
1153 MergeBlockIntoPredecessor(DestBB, DTU, LI);
1154 // Note: BB(=SinglePred) will not be deleted on this path.
1155 // DestBB(=its single successor) is the one that was deleted.
1156 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1157
1158 if (IsHugeFunc) {
1159 // Update FreshBBs to optimize the merged BB.
1160 FreshBBs.insert(SinglePred);
1161 FreshBBs.erase(DestBB);
1162 }
1163 return false;
1164 }
1165 }
1166
1167 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1168 // to handle the new incoming edges it is about to have.
1169 for (PHINode &PN : DestBB->phis()) {
1170 // Remove the incoming value for BB, and remember it.
1171 Value *InVal = PN.removeIncomingValue(BB, false);
1172
1173 // Two options: either the InVal is a phi node defined in BB or it is some
1174 // value that dominates BB.
1175 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1176 if (InValPhi && InValPhi->getParent() == BB) {
1177 // Add all of the input values of the input PHI as inputs of this phi.
1178 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1179 PN.addIncoming(InValPhi->getIncomingValue(i),
1180 InValPhi->getIncomingBlock(i));
1181 } else {
1182 // Otherwise, add one instance of the dominating value for each edge that
1183 // we will be adding.
1184 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1185 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1186 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1187 } else {
1188 for (BasicBlock *Pred : predecessors(BB))
1189 PN.addIncoming(InVal, Pred);
1190 }
1191 }
1192 }
1193
1194 // Preserve loop Metadata.
1195 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1196 for (auto *Pred : predecessors(BB))
1197 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1198 }
1199
1200 // The PHIs are now updated, change everything that refers to BB to use
1201 // DestBB and remove BB.
1203 SmallPtrSet<BasicBlock *, 8> SeenPreds;
1204 SmallPtrSet<BasicBlock *, 8> PredOfDestBB(llvm::from_range,
1205 predecessors(DestBB));
1206 for (auto *Pred : predecessors(BB)) {
1207 if (!PredOfDestBB.contains(Pred)) {
1208 if (SeenPreds.insert(Pred).second)
1209 DTUpdates.push_back({DominatorTree::Insert, Pred, DestBB});
1210 }
1211 }
1212 SeenPreds.clear();
1213 for (auto *Pred : predecessors(BB)) {
1214 if (SeenPreds.insert(Pred).second)
1215 DTUpdates.push_back({DominatorTree::Delete, Pred, BB});
1216 }
1217 DTUpdates.push_back({DominatorTree::Delete, BB, DestBB});
1218 BB->replaceAllUsesWith(DestBB);
1219 DTU->applyUpdates(DTUpdates);
1220 DTU->deleteBB(BB);
1221 ++NumBlocksElim;
1222
1223 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1224 return true;
1225}
1226
1227// Computes a map of base pointer relocation instructions to corresponding
1228// derived pointer relocation instructions given a vector of all relocate calls
1230 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1232 &RelocateInstMap) {
1233 // Collect information in two maps: one primarily for locating the base object
1234 // while filling the second map; the second map is the final structure holding
1235 // a mapping between Base and corresponding Derived relocate calls
1237 for (auto *ThisRelocate : AllRelocateCalls) {
1238 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1239 ThisRelocate->getDerivedPtrIndex());
1240 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1241 }
1242 for (auto &Item : RelocateIdxMap) {
1243 std::pair<unsigned, unsigned> Key = Item.first;
1244 if (Key.first == Key.second)
1245 // Base relocation: nothing to insert
1246 continue;
1247
1248 GCRelocateInst *I = Item.second;
1249 auto BaseKey = std::make_pair(Key.first, Key.first);
1250
1251 // We're iterating over RelocateIdxMap so we cannot modify it.
1252 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1253 if (MaybeBase == RelocateIdxMap.end())
1254 // TODO: We might want to insert a new base object relocate and gep off
1255 // that, if there are enough derived object relocates.
1256 continue;
1257
1258 RelocateInstMap[MaybeBase->second].push_back(I);
1259 }
1260}
1261
1262// Accepts a GEP and extracts the operands into a vector provided they're all
1263// small integer constants
1265 SmallVectorImpl<Value *> &OffsetV) {
1266 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1267 // Only accept small constant integer operands
1268 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1269 if (!Op || Op->getZExtValue() > 20)
1270 return false;
1271 }
1272
1273 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1274 OffsetV.push_back(GEP->getOperand(i));
1275 return true;
1276}
1277
1278// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1279// replace, computes a replacement, and affects it.
1280static bool
1282 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1283 bool MadeChange = false;
1284 // We must ensure the relocation of derived pointer is defined after
1285 // relocation of base pointer. If we find a relocation corresponding to base
1286 // defined earlier than relocation of base then we move relocation of base
1287 // right before found relocation. We consider only relocation in the same
1288 // basic block as relocation of base. Relocations from other basic block will
1289 // be skipped by optimization and we do not care about them.
1290 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1291 &*R != RelocatedBase; ++R)
1292 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1293 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1294 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1295 RelocatedBase->moveBefore(RI->getIterator());
1296 MadeChange = true;
1297 break;
1298 }
1299
1300 for (GCRelocateInst *ToReplace : Targets) {
1301 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1302 "Not relocating a derived object of the original base object");
1303 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1304 // A duplicate relocate call. TODO: coalesce duplicates.
1305 continue;
1306 }
1307
1308 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1309 // Base and derived relocates are in different basic blocks.
1310 // In this case transform is only valid when base dominates derived
1311 // relocate. However it would be too expensive to check dominance
1312 // for each such relocate, so we skip the whole transformation.
1313 continue;
1314 }
1315
1316 Value *Base = ToReplace->getBasePtr();
1317 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1318 if (!Derived || Derived->getPointerOperand() != Base)
1319 continue;
1320
1322 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1323 continue;
1324
1325 // Create a Builder and replace the target callsite with a gep
1326 assert(RelocatedBase->getNextNode() &&
1327 "Should always have one since it's not a terminator");
1328
1329 // Insert after RelocatedBase
1330 IRBuilder<> Builder(RelocatedBase->getNextNode());
1331 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1332
1333 // If gc_relocate does not match the actual type, cast it to the right type.
1334 // In theory, there must be a bitcast after gc_relocate if the type does not
1335 // match, and we should reuse it to get the derived pointer. But it could be
1336 // cases like this:
1337 // bb1:
1338 // ...
1339 // %g1 = call coldcc i8 addrspace(1)*
1340 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1341 //
1342 // bb2:
1343 // ...
1344 // %g2 = call coldcc i8 addrspace(1)*
1345 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1346 //
1347 // merge:
1348 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1349 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1350 //
1351 // In this case, we can not find the bitcast any more. So we insert a new
1352 // bitcast no matter there is already one or not. In this way, we can handle
1353 // all cases, and the extra bitcast should be optimized away in later
1354 // passes.
1355 Value *ActualRelocatedBase = RelocatedBase;
1356 if (RelocatedBase->getType() != Base->getType()) {
1357 ActualRelocatedBase =
1358 Builder.CreateBitCast(RelocatedBase, Base->getType());
1359 }
1360 Value *Replacement =
1361 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1362 ArrayRef(OffsetV));
1363 Replacement->takeName(ToReplace);
1364 // If the newly generated derived pointer's type does not match the original
1365 // derived pointer's type, cast the new derived pointer to match it. Same
1366 // reasoning as above.
1367 Value *ActualReplacement = Replacement;
1368 if (Replacement->getType() != ToReplace->getType()) {
1369 ActualReplacement =
1370 Builder.CreateBitCast(Replacement, ToReplace->getType());
1371 }
1372 ToReplace->replaceAllUsesWith(ActualReplacement);
1373 ToReplace->eraseFromParent();
1374
1375 MadeChange = true;
1376 }
1377 return MadeChange;
1378}
1379
1380// Turns this:
1381//
1382// %base = ...
1383// %ptr = gep %base + 15
1384// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1385// %base' = relocate(%tok, i32 4, i32 4)
1386// %ptr' = relocate(%tok, i32 4, i32 5)
1387// %val = load %ptr'
1388//
1389// into this:
1390//
1391// %base = ...
1392// %ptr = gep %base + 15
1393// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1394// %base' = gc.relocate(%tok, i32 4, i32 4)
1395// %ptr' = gep %base' + 15
1396// %val = load %ptr'
1397bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1398 bool MadeChange = false;
1399 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1400 for (auto *U : I.users())
1401 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1402 // Collect all the relocate calls associated with a statepoint
1403 AllRelocateCalls.push_back(Relocate);
1404
1405 // We need at least one base pointer relocation + one derived pointer
1406 // relocation to mangle
1407 if (AllRelocateCalls.size() < 2)
1408 return false;
1409
1410 // RelocateInstMap is a mapping from the base relocate instruction to the
1411 // corresponding derived relocate instructions
1412 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1413 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1414 if (RelocateInstMap.empty())
1415 return false;
1416
1417 for (auto &Item : RelocateInstMap)
1418 // Item.first is the RelocatedBase to offset against
1419 // Item.second is the vector of Targets to replace
1420 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1421 return MadeChange;
1422}
1423
1424/// Sink the specified cast instruction into its user blocks.
1425static bool SinkCast(CastInst *CI) {
1426 BasicBlock *DefBB = CI->getParent();
1427
1428 /// InsertedCasts - Only insert a cast in each block once.
1430
1431 bool MadeChange = false;
1432 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1433 UI != E;) {
1434 Use &TheUse = UI.getUse();
1436
1437 // Figure out which BB this cast is used in. For PHI's this is the
1438 // appropriate predecessor block.
1439 BasicBlock *UserBB = User->getParent();
1440 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1441 UserBB = PN->getIncomingBlock(TheUse);
1442 }
1443
1444 // Preincrement use iterator so we don't invalidate it.
1445 ++UI;
1446
1447 // The first insertion point of a block containing an EH pad is after the
1448 // pad. If the pad is the user, we cannot sink the cast past the pad.
1449 if (User->isEHPad())
1450 continue;
1451
1452 // If the block selected to receive the cast is an EH pad that does not
1453 // allow non-PHI instructions before the terminator, we can't sink the
1454 // cast.
1455 if (UserBB->getTerminator()->isEHPad())
1456 continue;
1457
1458 // If this user is in the same block as the cast, don't change the cast.
1459 if (UserBB == DefBB)
1460 continue;
1461
1462 // If we have already inserted a cast into this block, use it.
1463 CastInst *&InsertedCast = InsertedCasts[UserBB];
1464
1465 if (!InsertedCast) {
1466 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1467 assert(InsertPt != UserBB->end());
1468 InsertedCast = cast<CastInst>(CI->clone());
1469 InsertedCast->insertBefore(*UserBB, InsertPt);
1470 }
1471
1472 // Replace a use of the cast with a use of the new cast.
1473 TheUse = InsertedCast;
1474 MadeChange = true;
1475 ++NumCastUses;
1476 }
1477
1478 // If we removed all uses, nuke the cast.
1479 if (CI->use_empty()) {
1480 salvageDebugInfo(*CI);
1481 CI->eraseFromParent();
1482 MadeChange = true;
1483 }
1484
1485 return MadeChange;
1486}
1487
1488/// If the specified cast instruction is a noop copy (e.g. it's casting from
1489/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1490/// reduce the number of virtual registers that must be created and coalesced.
1491///
1492/// Return true if any changes are made.
1494 const DataLayout &DL) {
1495 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1496 // than sinking only nop casts, but is helpful on some platforms.
1497 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1498 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1499 ASC->getDestAddressSpace()))
1500 return false;
1501 }
1502
1503 // If this is a noop copy,
1504 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1505 EVT DstVT = TLI.getValueType(DL, CI->getType());
1506
1507 // This is an fp<->int conversion?
1508 if (SrcVT.isInteger() != DstVT.isInteger())
1509 return false;
1510
1511 // If this is an extension, it will be a zero or sign extension, which
1512 // isn't a noop.
1513 if (SrcVT.bitsLT(DstVT))
1514 return false;
1515
1516 // If these values will be promoted, find out what they will be promoted
1517 // to. This helps us consider truncates on PPC as noop copies when they
1518 // are.
1519 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1521 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1522 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1524 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1525
1526 // If, after promotion, these are the same types, this is a noop copy.
1527 if (SrcVT != DstVT)
1528 return false;
1529
1530 return SinkCast(CI);
1531}
1532
1533// Match a simple increment by constant operation. Note that if a sub is
1534// matched, the step is negated (as if the step had been canonicalized to
1535// an add, even though we leave the instruction alone.)
1536static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1537 Constant *&Step) {
1538 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1540 m_Instruction(LHS), m_Constant(Step)))))
1541 return true;
1542 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1544 m_Instruction(LHS), m_Constant(Step))))) {
1545 Step = ConstantExpr::getNeg(Step);
1546 return true;
1547 }
1548 return false;
1549}
1550
1551/// If given \p PN is an inductive variable with value IVInc coming from the
1552/// backedge, and on each iteration it gets increased by Step, return pair
1553/// <IVInc, Step>. Otherwise, return std::nullopt.
1554static std::optional<std::pair<Instruction *, Constant *>>
1555getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1556 const Loop *L = LI->getLoopFor(PN->getParent());
1557 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1558 return std::nullopt;
1559 auto *IVInc =
1560 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1561 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1562 return std::nullopt;
1563 Instruction *LHS = nullptr;
1564 Constant *Step = nullptr;
1565 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1566 return std::make_pair(IVInc, Step);
1567 return std::nullopt;
1568}
1569
1570static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1571 auto *I = dyn_cast<Instruction>(V);
1572 if (!I)
1573 return false;
1574 Instruction *LHS = nullptr;
1575 Constant *Step = nullptr;
1576 if (!matchIncrement(I, LHS, Step))
1577 return false;
1578 if (auto *PN = dyn_cast<PHINode>(LHS))
1579 if (auto IVInc = getIVIncrement(PN, LI))
1580 return IVInc->first == I;
1581 return false;
1582}
1583
1584bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1585 Value *Arg0, Value *Arg1,
1586 CmpInst *Cmp,
1587 Intrinsic::ID IID) {
1588 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1589 if (!isIVIncrement(BO, LI))
1590 return false;
1591 const Loop *L = LI->getLoopFor(BO->getParent());
1592 assert(L && "L should not be null after isIVIncrement()");
1593 // Do not risk on moving increment into a child loop.
1594 if (LI->getLoopFor(Cmp->getParent()) != L)
1595 return false;
1596
1597 // Finally, we need to ensure that the insert point will dominate all
1598 // existing uses of the increment.
1599
1600 auto &DT = getDT();
1601 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1602 // If we're moving up the dom tree, all uses are trivially dominated.
1603 // (This is the common case for code produced by LSR.)
1604 return true;
1605
1606 // Otherwise, special case the single use in the phi recurrence.
1607 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1608 };
1609 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1610 // We used to use a dominator tree here to allow multi-block optimization.
1611 // But that was problematic because:
1612 // 1. It could cause a perf regression by hoisting the math op into the
1613 // critical path.
1614 // 2. It could cause a perf regression by creating a value that was live
1615 // across multiple blocks and increasing register pressure.
1616 // 3. Use of a dominator tree could cause large compile-time regression.
1617 // This is because we recompute the DT on every change in the main CGP
1618 // run-loop. The recomputing is probably unnecessary in many cases, so if
1619 // that was fixed, using a DT here would be ok.
1620 //
1621 // There is one important particular case we still want to handle: if BO is
1622 // the IV increment. Important properties that make it profitable:
1623 // - We can speculate IV increment anywhere in the loop (as long as the
1624 // indvar Phi is its only user);
1625 // - Upon computing Cmp, we effectively compute something equivalent to the
1626 // IV increment (despite it loops differently in the IR). So moving it up
1627 // to the cmp point does not really increase register pressure.
1628 return false;
1629 }
1630
1631 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1632 if (BO->getOpcode() == Instruction::Add &&
1633 IID == Intrinsic::usub_with_overflow) {
1634 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1636 }
1637
1638 // Insert at the first instruction of the pair.
1639 Instruction *InsertPt = nullptr;
1640 for (Instruction &Iter : *Cmp->getParent()) {
1641 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1642 // the overflow intrinsic are defined.
1643 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1644 InsertPt = &Iter;
1645 break;
1646 }
1647 }
1648 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1649
1650 IRBuilder<> Builder(InsertPt);
1651 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1652 if (BO->getOpcode() != Instruction::Xor) {
1653 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1654 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1655 } else
1656 assert(BO->hasOneUse() &&
1657 "Patterns with XOr should use the BO only in the compare");
1658 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1659 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1660 Cmp->eraseFromParent();
1661 BO->eraseFromParent();
1662 return true;
1663}
1664
1665/// Match special-case patterns that check for unsigned add overflow.
1667 BinaryOperator *&Add) {
1668 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1669 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1670 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1671
1672 // We are not expecting non-canonical/degenerate code. Just bail out.
1673 if (isa<Constant>(A))
1674 return false;
1675
1676 ICmpInst::Predicate Pred = Cmp->getPredicate();
1677 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1678 B = ConstantInt::get(B->getType(), 1);
1679 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1680 B = Constant::getAllOnesValue(B->getType());
1681 else
1682 return false;
1683
1684 // Check the users of the variable operand of the compare looking for an add
1685 // with the adjusted constant.
1686 for (User *U : A->users()) {
1687 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1689 return true;
1690 }
1691 }
1692 return false;
1693}
1694
1695/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1696/// intrinsic. Return true if any changes were made.
1697bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1698 ModifyDT &ModifiedDT) {
1699 bool EdgeCase = false;
1700 Value *A, *B;
1701 BinaryOperator *Add;
1702 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1704 return false;
1705 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1706 A = Add->getOperand(0);
1707 B = Add->getOperand(1);
1708 EdgeCase = true;
1709 }
1710
1712 TLI->getValueType(*DL, Add->getType()),
1713 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1714 return false;
1715
1716 // We don't want to move around uses of condition values this late, so we
1717 // check if it is legal to create the call to the intrinsic in the basic
1718 // block containing the icmp.
1719 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1720 return false;
1721
1722 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1723 Intrinsic::uadd_with_overflow))
1724 return false;
1725
1726 // Reset callers - do not crash by iterating over a dead instruction.
1727 ModifiedDT = ModifyDT::ModifyInstDT;
1728 return true;
1729}
1730
1731bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1732 ModifyDT &ModifiedDT) {
1733 // We are not expecting non-canonical/degenerate code. Just bail out.
1734 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1735 if (isa<Constant>(A) && isa<Constant>(B))
1736 return false;
1737
1738 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1739 ICmpInst::Predicate Pred = Cmp->getPredicate();
1740 if (Pred == ICmpInst::ICMP_UGT) {
1741 std::swap(A, B);
1742 Pred = ICmpInst::ICMP_ULT;
1743 }
1744 // Convert special-case: (A == 0) is the same as (A u< 1).
1745 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1746 B = ConstantInt::get(B->getType(), 1);
1747 Pred = ICmpInst::ICMP_ULT;
1748 }
1749 // Convert special-case: (A != 0) is the same as (0 u< A).
1750 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1751 std::swap(A, B);
1752 Pred = ICmpInst::ICMP_ULT;
1753 }
1754 if (Pred != ICmpInst::ICMP_ULT)
1755 return false;
1756
1757 // Walk the users of a variable operand of a compare looking for a subtract or
1758 // add with that same operand. Also match the 2nd operand of the compare to
1759 // the add/sub, but that may be a negated constant operand of an add.
1760 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1761 BinaryOperator *Sub = nullptr;
1762 for (User *U : CmpVariableOperand->users()) {
1763 // A - B, A u< B --> usubo(A, B)
1764 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1766 break;
1767 }
1768
1769 // A + (-C), A u< C (canonicalized form of (sub A, C))
1770 const APInt *CmpC, *AddC;
1771 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1772 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1774 break;
1775 }
1776 }
1777 if (!Sub)
1778 return false;
1779
1781 TLI->getValueType(*DL, Sub->getType()),
1782 Sub->hasNUsesOrMore(1)))
1783 return false;
1784
1785 // We don't want to move around uses of condition values this late, so we
1786 // check if it is legal to create the call to the intrinsic in the basic
1787 // block containing the icmp.
1788 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1789 return false;
1790
1791 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1792 Cmp, Intrinsic::usub_with_overflow))
1793 return false;
1794
1795 // Reset callers - do not crash by iterating over a dead instruction.
1796 ModifiedDT = ModifyDT::ModifyInstDT;
1797 return true;
1798}
1799
1800// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1801// The same transformation exists in DAG combiner, but we repeat it here because
1802// DAG builder can break the pattern by moving icmp into a successor block.
1803bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1804 CmpPredicate Pred;
1805 Value *X;
1806 const APInt *C;
1807
1808 // (icmp (ctpop x), c)
1811 return false;
1812
1813 // We're only interested in "is power of 2 [or zero]" patterns.
1814 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1815 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1816 (Pred == CmpInst::ICMP_UGT && *C == 1);
1817 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1818 return false;
1819
1820 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1821 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1822 // and otherwise expand ctpop into a few simple instructions.
1823 Type *OpTy = X->getType();
1824 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1825 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1826 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1827 return false;
1828
1829 // ctpop(x) == 1 -> ctpop(x) u< 2
1830 // ctpop(x) != 1 -> ctpop(x) u> 1
1831 if (Pred == ICmpInst::ICMP_EQ) {
1832 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1833 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1834 } else {
1835 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1836 }
1837 return true;
1838 }
1839
1840 Value *NewCmp;
1841 if (IsPowerOf2OrZeroTest ||
1842 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1843 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1844 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1845 IRBuilder<> Builder(Cmp);
1846 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1847 Value *And = Builder.CreateAnd(X, Sub);
1848 CmpInst::Predicate NewPred =
1849 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1851 : CmpInst::ICMP_NE;
1852 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1853 } else {
1854 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1855 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1856 IRBuilder<> Builder(Cmp);
1857 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1858 Value *Xor = Builder.CreateXor(X, Sub);
1859 CmpInst::Predicate NewPred =
1861 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1862 }
1863
1864 Cmp->replaceAllUsesWith(NewCmp);
1866 return true;
1867}
1868
1869/// Sink the given CmpInst into user blocks to reduce the number of virtual
1870/// registers that must be created and coalesced. This is a clear win except on
1871/// targets with multiple condition code registers (PowerPC), where it might
1872/// lose; some adjustment may be wanted there.
1873///
1874/// Return true if any changes are made.
1875static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1876 const DataLayout &DL) {
1877 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1878 return false;
1879
1880 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1881 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1882 return false;
1883
1884 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1885 return isa<PHINode>(U) ||
1886 cast<Instruction>(U)->getParent() == Cmp->getParent();
1887 });
1888
1889 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1890 // another BB.
1891 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1892 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1893 DL.getLargestLegalIntTypeSizeInBits())
1894 return false;
1895
1896 // Only insert a cmp in each block once.
1898
1899 bool MadeChange = false;
1900 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1901 UI != E;) {
1902 Use &TheUse = UI.getUse();
1904
1905 // Preincrement use iterator so we don't invalidate it.
1906 ++UI;
1907
1908 // Don't bother for PHI nodes.
1909 if (isa<PHINode>(User))
1910 continue;
1911
1912 // Figure out which BB this cmp is used in.
1913 BasicBlock *UserBB = User->getParent();
1914 BasicBlock *DefBB = Cmp->getParent();
1915
1916 // If this user is in the same block as the cmp, don't change the cmp.
1917 if (UserBB == DefBB)
1918 continue;
1919
1920 // If we have already inserted a cmp into this block, use it.
1921 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1922
1923 if (!InsertedCmp) {
1924 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1925 assert(InsertPt != UserBB->end());
1926 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1927 Cmp->getOperand(0), Cmp->getOperand(1), "");
1928 InsertedCmp->insertBefore(*UserBB, InsertPt);
1929 // Propagate the debug info.
1930 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1931 }
1932
1933 // Replace a use of the cmp with a use of the new cmp.
1934 TheUse = InsertedCmp;
1935 MadeChange = true;
1936 ++NumCmpUses;
1937 }
1938
1939 // If we removed all uses, nuke the cmp.
1940 if (Cmp->use_empty()) {
1941 Cmp->eraseFromParent();
1942 MadeChange = true;
1943 }
1944
1945 return MadeChange;
1946}
1947
1948/// For pattern like:
1949///
1950/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1951/// ...
1952/// DomBB:
1953/// ...
1954/// br DomCond, TrueBB, CmpBB
1955/// CmpBB: (with DomBB being the single predecessor)
1956/// ...
1957/// Cmp = icmp eq CmpOp0, CmpOp1
1958/// ...
1959///
1960/// It would use two comparison on targets that lowering of icmp sgt/slt is
1961/// different from lowering of icmp eq (PowerPC). This function try to convert
1962/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1963/// After that, DomCond and Cmp can use the same comparison so reduce one
1964/// comparison.
1965///
1966/// Return true if any changes are made.
1968 const TargetLowering &TLI) {
1970 return false;
1971
1972 ICmpInst::Predicate Pred = Cmp->getPredicate();
1973 if (Pred != ICmpInst::ICMP_EQ)
1974 return false;
1975
1976 // If icmp eq has users other than CondBrInst and SelectInst, converting it to
1977 // icmp slt/sgt would introduce more redundant LLVM IR.
1978 for (User *U : Cmp->users()) {
1979 if (isa<CondBrInst>(U))
1980 continue;
1981 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1982 continue;
1983 return false;
1984 }
1985
1986 // This is a cheap/incomplete check for dominance - just match a single
1987 // predecessor with a conditional branch.
1988 BasicBlock *CmpBB = Cmp->getParent();
1989 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1990 if (!DomBB)
1991 return false;
1992
1993 // We want to ensure that the only way control gets to the comparison of
1994 // interest is that a less/greater than comparison on the same operands is
1995 // false.
1996 Value *DomCond;
1997 BasicBlock *TrueBB, *FalseBB;
1998 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1999 return false;
2000 if (CmpBB != FalseBB)
2001 return false;
2002
2003 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
2004 CmpPredicate DomPred;
2005 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
2006 return false;
2007 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
2008 return false;
2009
2010 // Convert the equality comparison to the opposite of the dominating
2011 // comparison and swap the direction for all branch/select users.
2012 // We have conceptually converted:
2013 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
2014 // to
2015 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
2016 // And similarly for branches.
2017 for (User *U : Cmp->users()) {
2018 if (auto *BI = dyn_cast<CondBrInst>(U)) {
2019 BI->swapSuccessors();
2020 continue;
2021 }
2022 if (auto *SI = dyn_cast<SelectInst>(U)) {
2023 // Swap operands
2024 SI->swapValues();
2025 SI->swapProfMetadata();
2026 continue;
2027 }
2028 llvm_unreachable("Must be a branch or a select");
2029 }
2030 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
2031 return true;
2032}
2033
2034/// Many architectures use the same instruction for both subtract and cmp. Try
2035/// to swap cmp operands to match subtract operations to allow for CSE.
2037 Value *Op0 = Cmp->getOperand(0);
2038 Value *Op1 = Cmp->getOperand(1);
2039 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2040 isa<Constant>(Op1) || Op0 == Op1)
2041 return false;
2042
2043 // If a subtract already has the same operands as a compare, swapping would be
2044 // bad. If a subtract has the same operands as a compare but in reverse order,
2045 // then swapping is good.
2046 int GoodToSwap = 0;
2047 unsigned NumInspected = 0;
2048 for (const User *U : Op0->users()) {
2049 // Avoid walking many users.
2050 if (++NumInspected > 128)
2051 return false;
2052 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2053 GoodToSwap++;
2054 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2055 GoodToSwap--;
2056 }
2057
2058 if (GoodToSwap > 0) {
2059 Cmp->swapOperands();
2060 return true;
2061 }
2062 return false;
2063}
2064
2065static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2066 const DataLayout &DL) {
2067 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2068 if (!FCmp)
2069 return false;
2070
2071 // Don't fold if the target offers free fabs and the predicate is legal.
2072 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2073 if (TLI.isFAbsFree(VT) &&
2075 VT.getSimpleVT()))
2076 return false;
2077
2078 // Reverse the canonicalization if it is a FP class test
2079 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2080 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2081 };
2082 auto [ClassVal, ClassTest] =
2083 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2084 FCmp->getOperand(0), FCmp->getOperand(1));
2085 if (!ClassVal)
2086 return false;
2087
2088 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2089 return false;
2090
2091 IRBuilder<> Builder(Cmp);
2092 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2093 Cmp->replaceAllUsesWith(IsFPClass);
2095 return true;
2096}
2097
2099 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2100 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2101 Value *Incr, *RemAmt;
2102 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2103 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2104 return false;
2105
2106 Value *AddInst, *AddOffset;
2107 // Find out loop increment PHI.
2108 auto *PN = dyn_cast<PHINode>(Incr);
2109 if (PN != nullptr) {
2110 AddInst = nullptr;
2111 AddOffset = nullptr;
2112 } else {
2113 // Search through a NUW add on top of the loop increment.
2114 Value *V0, *V1;
2115 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2116 return false;
2117
2118 AddInst = Incr;
2119 PN = dyn_cast<PHINode>(V0);
2120 if (PN != nullptr) {
2121 AddOffset = V1;
2122 } else {
2123 PN = dyn_cast<PHINode>(V1);
2124 AddOffset = V0;
2125 }
2126 }
2127
2128 if (!PN)
2129 return false;
2130
2131 // This isn't strictly necessary, what we really need is one increment and any
2132 // amount of initial values all being the same.
2133 if (PN->getNumIncomingValues() != 2)
2134 return false;
2135
2136 // Only trivially analyzable loops.
2137 Loop *L = LI->getLoopFor(PN->getParent());
2138 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2139 return false;
2140
2141 // Req that the remainder is in the loop
2142 if (!L->contains(Rem))
2143 return false;
2144
2145 // Only works if the remainder amount is a loop invaraint
2146 if (!L->isLoopInvariant(RemAmt))
2147 return false;
2148
2149 // Only works if the AddOffset is a loop invaraint
2150 if (AddOffset && !L->isLoopInvariant(AddOffset))
2151 return false;
2152
2153 // Is the PHI a loop increment?
2154 auto LoopIncrInfo = getIVIncrement(PN, LI);
2155 if (!LoopIncrInfo)
2156 return false;
2157
2158 // We need remainder_amount % increment_amount to be zero. Increment of one
2159 // satisfies that without any special logic and is overwhelmingly the common
2160 // case.
2161 if (!match(LoopIncrInfo->second, m_One()))
2162 return false;
2163
2164 // Need the increment to not overflow.
2165 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2166 return false;
2167
2168 // Set output variables.
2169 RemAmtOut = RemAmt;
2170 LoopIncrPNOut = PN;
2171 AddInstOut = AddInst;
2172 AddOffsetOut = AddOffset;
2173
2174 return true;
2175}
2176
2177// Try to transform:
2178//
2179// for(i = Start; i < End; ++i)
2180// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2181//
2182// ->
2183//
2184// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2185// for(i = Start; i < End; ++i, ++rem)
2186// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2188 const LoopInfo *LI,
2190 bool IsHuge) {
2191 Value *AddOffset, *RemAmt, *AddInst;
2192 PHINode *LoopIncrPN;
2193 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2194 AddOffset, LoopIncrPN))
2195 return false;
2196
2197 // Only non-constant remainder as the extra IV is probably not profitable
2198 // in that case.
2199 //
2200 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2201 // we can rule out register pressure and ensure this `urem` is executed each
2202 // iteration, its probably profitable to handle the const case as well.
2203 //
2204 // Potential TODO(2): Should we have a check for how "nested" this remainder
2205 // operation is? The new code runs every iteration so if the remainder is
2206 // guarded behind unlikely conditions this might not be worth it.
2207 if (match(RemAmt, m_ImmConstant()))
2208 return false;
2209
2210 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2211 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2212 // If we have add create initial value for remainder.
2213 // The logic here is:
2214 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2215 //
2216 // Only proceed if the expression simplifies (otherwise we can't fully
2217 // optimize out the urem).
2218 if (AddInst) {
2219 assert(AddOffset && "We found an add but missing values");
2220 // Without dom-condition/assumption cache we aren't likely to get much out
2221 // of a context instruction.
2222 Start = simplifyAddInst(Start, AddOffset,
2223 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2224 /*IsNUW=*/true, *DL);
2225 if (!Start)
2226 return false;
2227 }
2228
2229 // If we can't fully optimize out the `rem`, skip this transform.
2230 Start = simplifyURemInst(Start, RemAmt, *DL);
2231 if (!Start)
2232 return false;
2233
2234 // Create new remainder with induction variable.
2235 Type *Ty = Rem->getType();
2236 IRBuilder<> Builder(Rem->getContext());
2237
2238 Builder.SetInsertPoint(LoopIncrPN);
2239 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2240
2241 Builder.SetInsertPoint(cast<Instruction>(
2242 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2243 // `(add (urem x, y), 1)` is always nuw.
2244 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2245 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2246 Value *RemSel =
2247 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2248
2249 NewRem->addIncoming(Start, L->getLoopPreheader());
2250 NewRem->addIncoming(RemSel, L->getLoopLatch());
2251
2252 // Insert all touched BBs.
2253 FreshBBs.insert(LoopIncrPN->getParent());
2254 FreshBBs.insert(L->getLoopLatch());
2255 FreshBBs.insert(Rem->getParent());
2256 if (AddInst)
2257 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2258 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2259 Rem->eraseFromParent();
2260 if (AddInst && AddInst->use_empty())
2261 cast<Instruction>(AddInst)->eraseFromParent();
2262 return true;
2263}
2264
2265bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2266 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2267 return true;
2268 return false;
2269}
2270
2271bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2272 if (sinkCmpExpression(Cmp, *TLI, *DL))
2273 return true;
2274
2275 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2276 return true;
2277
2278 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2279 return true;
2280
2281 if (unfoldPowerOf2Test(Cmp))
2282 return true;
2283
2284 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2285 return true;
2286
2288 return true;
2289
2290 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2291 return true;
2292
2293 return false;
2294}
2295
2296/// Duplicate and sink the given 'and' instruction into user blocks where it is
2297/// used in a compare to allow isel to generate better code for targets where
2298/// this operation can be combined.
2299///
2300/// Return true if any changes are made.
2302 SetOfInstrs &InsertedInsts) {
2303 // Double-check that we're not trying to optimize an instruction that was
2304 // already optimized by some other part of this pass.
2305 assert(!InsertedInsts.count(AndI) &&
2306 "Attempting to optimize already optimized and instruction");
2307 (void)InsertedInsts;
2308
2309 // Nothing to do for single use in same basic block.
2310 if (AndI->hasOneUse() &&
2311 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2312 return false;
2313
2314 // Try to avoid cases where sinking/duplicating is likely to increase register
2315 // pressure.
2316 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2317 !isa<ConstantInt>(AndI->getOperand(1)) &&
2318 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2319 return false;
2320
2321 for (auto *U : AndI->users()) {
2323
2324 // Only sink 'and' feeding icmp with 0.
2325 if (!isa<ICmpInst>(User))
2326 return false;
2327
2328 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2329 if (!CmpC || !CmpC->isZero())
2330 return false;
2331 }
2332
2333 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2334 return false;
2335
2336 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2337 LLVM_DEBUG(AndI->getParent()->dump());
2338
2339 // Push the 'and' into the same block as the icmp 0. There should only be
2340 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2341 // others, so we don't need to keep track of which BBs we insert into.
2342 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2343 UI != E;) {
2344 Use &TheUse = UI.getUse();
2346
2347 // Preincrement use iterator so we don't invalidate it.
2348 ++UI;
2349
2350 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2351
2352 // Keep the 'and' in the same place if the use is already in the same block.
2353 Instruction *InsertPt =
2354 User->getParent() == AndI->getParent() ? AndI : User;
2355 Instruction *InsertedAnd = BinaryOperator::Create(
2356 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2357 InsertPt->getIterator());
2358 // Propagate the debug info.
2359 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2360
2361 // Replace a use of the 'and' with a use of the new 'and'.
2362 TheUse = InsertedAnd;
2363 ++NumAndUses;
2364 LLVM_DEBUG(User->getParent()->dump());
2365 }
2366
2367 // We removed all uses, nuke the and.
2368 AndI->eraseFromParent();
2369 return true;
2370}
2371
2372/// Check if the candidates could be combined with a shift instruction, which
2373/// includes:
2374/// 1. Truncate instruction
2375/// 2. And instruction and the imm is a mask of the low bits:
2376/// imm & (imm+1) == 0
2378 if (!isa<TruncInst>(User)) {
2379 if (User->getOpcode() != Instruction::And ||
2381 return false;
2382
2383 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2384
2385 if ((Cimm & (Cimm + 1)).getBoolValue())
2386 return false;
2387 }
2388 return true;
2389}
2390
2391/// Sink both shift and truncate instruction to the use of truncate's BB.
2392static bool
2395 const TargetLowering &TLI, const DataLayout &DL) {
2396 BasicBlock *UserBB = User->getParent();
2398 auto *TruncI = cast<TruncInst>(User);
2399 bool MadeChange = false;
2400
2401 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2402 TruncE = TruncI->user_end();
2403 TruncUI != TruncE;) {
2404
2405 Use &TruncTheUse = TruncUI.getUse();
2406 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2407 // Preincrement use iterator so we don't invalidate it.
2408
2409 ++TruncUI;
2410
2411 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2412 if (!ISDOpcode)
2413 continue;
2414
2415 // If the use is actually a legal node, there will not be an
2416 // implicit truncate.
2417 // FIXME: always querying the result type is just an
2418 // approximation; some nodes' legality is determined by the
2419 // operand or other means. There's no good way to find out though.
2421 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2422 continue;
2423
2424 // Don't bother for PHI nodes.
2425 if (isa<PHINode>(TruncUser))
2426 continue;
2427
2428 BasicBlock *TruncUserBB = TruncUser->getParent();
2429
2430 if (UserBB == TruncUserBB)
2431 continue;
2432
2433 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2434 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2435
2436 if (!InsertedShift && !InsertedTrunc) {
2437 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2438 assert(InsertPt != TruncUserBB->end());
2439 // Sink the shift
2440 if (ShiftI->getOpcode() == Instruction::AShr)
2441 InsertedShift =
2442 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2443 else
2444 InsertedShift =
2445 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2446 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2447 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2448
2449 // Sink the trunc
2450 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2451 TruncInsertPt++;
2452 // It will go ahead of any debug-info.
2453 TruncInsertPt.setHeadBit(true);
2454 assert(TruncInsertPt != TruncUserBB->end());
2455
2456 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2457 TruncI->getType(), "");
2458 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2459 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2460
2461 MadeChange = true;
2462
2463 TruncTheUse = InsertedTrunc;
2464 }
2465 }
2466 return MadeChange;
2467}
2468
2469/// Sink the shift *right* instruction into user blocks if the uses could
2470/// potentially be combined with this shift instruction and generate BitExtract
2471/// instruction. It will only be applied if the architecture supports BitExtract
2472/// instruction. Here is an example:
2473/// BB1:
2474/// %x.extract.shift = lshr i64 %arg1, 32
2475/// BB2:
2476/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2477/// ==>
2478///
2479/// BB2:
2480/// %x.extract.shift.1 = lshr i64 %arg1, 32
2481/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2482///
2483/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2484/// instruction.
2485/// Return true if any changes are made.
2487 const TargetLowering &TLI,
2488 const DataLayout &DL) {
2489 BasicBlock *DefBB = ShiftI->getParent();
2490
2491 /// Only insert instructions in each block once.
2493
2494 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2495
2496 bool MadeChange = false;
2497 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2498 UI != E;) {
2499 Use &TheUse = UI.getUse();
2501 // Preincrement use iterator so we don't invalidate it.
2502 ++UI;
2503
2504 // Don't bother for PHI nodes.
2505 if (isa<PHINode>(User))
2506 continue;
2507
2509 continue;
2510
2511 BasicBlock *UserBB = User->getParent();
2512
2513 if (UserBB == DefBB) {
2514 // If the shift and truncate instruction are in the same BB. The use of
2515 // the truncate(TruncUse) may still introduce another truncate if not
2516 // legal. In this case, we would like to sink both shift and truncate
2517 // instruction to the BB of TruncUse.
2518 // for example:
2519 // BB1:
2520 // i64 shift.result = lshr i64 opnd, imm
2521 // trunc.result = trunc shift.result to i16
2522 //
2523 // BB2:
2524 // ----> We will have an implicit truncate here if the architecture does
2525 // not have i16 compare.
2526 // cmp i16 trunc.result, opnd2
2527 //
2528 if (isa<TruncInst>(User) &&
2529 shiftIsLegal
2530 // If the type of the truncate is legal, no truncate will be
2531 // introduced in other basic blocks.
2532 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2533 MadeChange =
2534 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2535
2536 continue;
2537 }
2538 // If we have already inserted a shift into this block, use it.
2539 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2540
2541 if (!InsertedShift) {
2542 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2543 assert(InsertPt != UserBB->end());
2544
2545 if (ShiftI->getOpcode() == Instruction::AShr)
2546 InsertedShift =
2547 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2548 else
2549 InsertedShift =
2550 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2551 InsertedShift->insertBefore(*UserBB, InsertPt);
2552 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2553
2554 MadeChange = true;
2555 }
2556
2557 // Replace a use of the shift with a use of the new shift.
2558 TheUse = InsertedShift;
2559 }
2560
2561 // If we removed all uses, or there are none, nuke the shift.
2562 if (ShiftI->use_empty()) {
2563 salvageDebugInfo(*ShiftI);
2564 ShiftI->eraseFromParent();
2565 MadeChange = true;
2566 }
2567
2568 return MadeChange;
2569}
2570
2571/// If counting leading or trailing zeros is an expensive operation and a zero
2572/// input is defined, add a check for zero to avoid calling the intrinsic.
2573///
2574/// We want to transform:
2575/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2576///
2577/// into:
2578/// entry:
2579/// %cmpz = icmp eq i64 %A, 0
2580/// br i1 %cmpz, label %cond.end, label %cond.false
2581/// cond.false:
2582/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2583/// br label %cond.end
2584/// cond.end:
2585/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2586///
2587/// If the transform is performed, return true and set ModifiedDT to true.
2588static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2589 DomTreeUpdater *DTU, LoopInfo *LI,
2590 const TargetLowering *TLI,
2591 const DataLayout *DL, ModifyDT &ModifiedDT,
2593 bool IsHugeFunc) {
2594 // If a zero input is undefined, it doesn't make sense to despeculate that.
2595 if (match(CountZeros->getOperand(1), m_One()))
2596 return false;
2597
2598 // If it's cheap to speculate, there's nothing to do.
2599 Type *Ty = CountZeros->getType();
2600 auto IntrinsicID = CountZeros->getIntrinsicID();
2601 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2602 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2603 return false;
2604
2605 // Only handle scalar cases. Anything else requires too much work.
2606 unsigned SizeInBits = Ty->getScalarSizeInBits();
2607 if (Ty->isVectorTy())
2608 return false;
2609
2610 // Bail if the value is never zero.
2611 Use &Op = CountZeros->getOperandUse(0);
2612 if (isKnownNonZero(Op, *DL))
2613 return false;
2614
2615 // The intrinsic will be sunk behind a compare against zero and branch.
2616 BasicBlock *StartBlock = CountZeros->getParent();
2617 BasicBlock *CallBlock = SplitBlock(StartBlock, CountZeros, DTU, LI,
2618 /* MSSAU */ nullptr, "cond.false");
2619 if (IsHugeFunc)
2620 FreshBBs.insert(CallBlock);
2621
2622 // Create another block after the count zero intrinsic. A PHI will be added
2623 // in this block to select the result of the intrinsic or the bit-width
2624 // constant if the input to the intrinsic is zero.
2625 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2626 // Any debug-info after CountZeros should not be included.
2627 SplitPt.setHeadBit(true);
2628 BasicBlock *EndBlock = SplitBlock(CallBlock, &*SplitPt, DTU, LI,
2629 /* MSSAU */ nullptr, "cond.end");
2630 if (IsHugeFunc)
2631 FreshBBs.insert(EndBlock);
2632
2633 // Set up a builder to create a compare, conditional branch, and PHI.
2634 IRBuilder<> Builder(CountZeros->getContext());
2635 Builder.SetInsertPoint(StartBlock->getTerminator());
2636 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2637
2638 // Replace the unconditional branch that was created by the first split with
2639 // a compare against zero and a conditional branch.
2640 Value *Zero = Constant::getNullValue(Ty);
2641 // Avoid introducing branch on poison. This also replaces the ctz operand.
2643 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2644 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2645 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2646 StartBlock->getTerminator()->eraseFromParent();
2647 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}});
2648
2649 // Create a PHI in the end block to select either the output of the intrinsic
2650 // or the bit width of the operand.
2651 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2652 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2653 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2654 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2655 PN->addIncoming(BitWidth, StartBlock);
2656 PN->addIncoming(CountZeros, CallBlock);
2657
2658 // We are explicitly handling the zero case, so we can set the intrinsic's
2659 // undefined zero argument to 'true'. This will also prevent reprocessing the
2660 // intrinsic; we only despeculate when a zero input is defined.
2661 CountZeros->setArgOperand(1, Builder.getTrue());
2662 ModifiedDT = ModifyDT::ModifyBBDT;
2663 return true;
2664}
2665
2666bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2667 BasicBlock *BB = CI->getParent();
2668
2669 // Sink address computing for memory operands into the block.
2670 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2671 return true;
2672
2673 // Align the pointer arguments to this call if the target thinks it's a good
2674 // idea
2675 unsigned MinSize;
2676 Align PrefAlign;
2677 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2678 for (auto &Arg : CI->args()) {
2679 // We want to align both objects whose address is used directly and
2680 // objects whose address is used in casts and GEPs, though it only makes
2681 // sense for GEPs if the offset is a multiple of the desired alignment and
2682 // if size - offset meets the size threshold.
2683 if (!Arg->getType()->isPointerTy())
2684 continue;
2685 APInt Offset(DL->getIndexSizeInBits(
2686 cast<PointerType>(Arg->getType())->getAddressSpace()),
2687 0);
2688 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2689 uint64_t Offset2 = Offset.getLimitedValue();
2690 if (!isAligned(PrefAlign, Offset2))
2691 continue;
2692 AllocaInst *AI;
2693 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign) {
2694 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(*DL);
2695 if (AllocaSize && AllocaSize->getKnownMinValue() >= MinSize + Offset2)
2696 AI->setAlignment(PrefAlign);
2697 }
2698 // Global variables can only be aligned if they are defined in this
2699 // object (i.e. they are uniquely initialized in this object), and
2700 // over-aligning global variables that have an explicit section is
2701 // forbidden.
2702 GlobalVariable *GV;
2703 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2704 GV->getPointerAlignment(*DL) < PrefAlign &&
2705 GV->getGlobalSize(*DL) >= MinSize + Offset2)
2706 GV->setAlignment(PrefAlign);
2707 }
2708 }
2709 // If this is a memcpy (or similar) then we may be able to improve the
2710 // alignment.
2711 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2712 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2713 MaybeAlign MIDestAlign = MI->getDestAlign();
2714 if (!MIDestAlign || DestAlign > *MIDestAlign)
2715 MI->setDestAlignment(DestAlign);
2716 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2717 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2718 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2719 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2720 MTI->setSourceAlignment(SrcAlign);
2721 }
2722 }
2723
2724 // If we have a cold call site, try to sink addressing computation into the
2725 // cold block. This interacts with our handling for loads and stores to
2726 // ensure that we can fold all uses of a potential addressing computation
2727 // into their uses. TODO: generalize this to work over profiling data
2728 if (CI->hasFnAttr(Attribute::Cold) &&
2729 !llvm::shouldOptimizeForSize(BB, PSI, BFI))
2730 for (auto &Arg : CI->args()) {
2731 if (!Arg->getType()->isPointerTy())
2732 continue;
2733 unsigned AS = Arg->getType()->getPointerAddressSpace();
2734 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2735 return true;
2736 }
2737
2738 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2739 if (II) {
2740 switch (II->getIntrinsicID()) {
2741 default:
2742 break;
2743 case Intrinsic::assume:
2744 llvm_unreachable("llvm.assume should have been removed already");
2745 case Intrinsic::allow_runtime_check:
2746 case Intrinsic::allow_ubsan_check:
2747 case Intrinsic::experimental_widenable_condition: {
2748 // Give up on future widening opportunities so that we can fold away dead
2749 // paths and merge blocks before going into block-local instruction
2750 // selection.
2751 if (II->use_empty()) {
2752 II->eraseFromParent();
2753 return true;
2754 }
2755 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2756 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2757 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2758 });
2759 return true;
2760 }
2761 case Intrinsic::objectsize:
2762 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2763 case Intrinsic::is_constant:
2764 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2765 case Intrinsic::aarch64_stlxr:
2766 case Intrinsic::aarch64_stxr: {
2767 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2768 if (!ExtVal || !ExtVal->hasOneUse() ||
2769 ExtVal->getParent() == CI->getParent())
2770 return false;
2771 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2772 ExtVal->moveBefore(CI->getIterator());
2773 // Mark this instruction as "inserted by CGP", so that other
2774 // optimizations don't touch it.
2775 InsertedInsts.insert(ExtVal);
2776 return true;
2777 }
2778
2779 case Intrinsic::launder_invariant_group:
2780 case Intrinsic::strip_invariant_group: {
2781 Value *ArgVal = II->getArgOperand(0);
2782 auto it = LargeOffsetGEPMap.find(II);
2783 if (it != LargeOffsetGEPMap.end()) {
2784 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2785 // Make sure not to have to deal with iterator invalidation
2786 // after possibly adding ArgVal to LargeOffsetGEPMap.
2787 auto GEPs = std::move(it->second);
2788 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2789 LargeOffsetGEPMap.erase(II);
2790 }
2791
2792 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2793 II->eraseFromParent();
2794 return true;
2795 }
2796 case Intrinsic::cttz:
2797 case Intrinsic::ctlz:
2798 // If counting zeros is expensive, try to avoid it.
2799 return despeculateCountZeros(II, DTU, LI, TLI, DL, ModifiedDT, FreshBBs,
2800 IsHugeFunc);
2801 case Intrinsic::fshl:
2802 case Intrinsic::fshr:
2803 return optimizeFunnelShift(II);
2804 case Intrinsic::masked_gather:
2805 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2806 case Intrinsic::masked_scatter:
2807 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2808 case Intrinsic::masked_load:
2809 // Treat v1X masked load as load X type.
2810 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2811 if (VT->getNumElements() == 1) {
2812 Value *PtrVal = II->getArgOperand(0);
2813 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2814 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2815 return true;
2816 }
2817 }
2818 return false;
2819 case Intrinsic::masked_store:
2820 // Treat v1X masked store as store X type.
2821 if (auto *VT =
2822 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2823 if (VT->getNumElements() == 1) {
2824 Value *PtrVal = II->getArgOperand(1);
2825 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2826 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2827 return true;
2828 }
2829 }
2830 return false;
2831 case Intrinsic::umul_with_overflow:
2832 return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
2833 case Intrinsic::smul_with_overflow:
2834 return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
2835 }
2836
2837 SmallVector<Value *, 2> PtrOps;
2838 Type *AccessTy;
2839 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2840 while (!PtrOps.empty()) {
2841 Value *PtrVal = PtrOps.pop_back_val();
2842 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2843 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2844 return true;
2845 }
2846 }
2847
2848 // From here on out we're working with named functions.
2849 auto *Callee = CI->getCalledFunction();
2850 if (!Callee)
2851 return false;
2852
2853 // Lower all default uses of _chk calls. This is very similar
2854 // to what InstCombineCalls does, but here we are only lowering calls
2855 // to fortified library functions (e.g. __memcpy_chk) that have the default
2856 // "don't know" as the objectsize. Anything else should be left alone.
2857 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2858 IRBuilder<> Builder(CI);
2859 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2860 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2861 CI->eraseFromParent();
2862 return true;
2863 }
2864
2865 // SCCP may have propagated, among other things, C++ static variables across
2866 // calls. If this happens to be the case, we may want to undo it in order to
2867 // avoid redundant pointer computation of the constant, as the function method
2868 // returning the constant needs to be executed anyways.
2869 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2870 if (!F->getReturnType()->isPointerTy())
2871 return nullptr;
2872
2873 GlobalVariable *UniformValue = nullptr;
2874 for (auto &BB : *F) {
2875 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2876 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2877 if (!UniformValue)
2878 UniformValue = V;
2879 else if (V != UniformValue)
2880 return nullptr;
2881 } else {
2882 return nullptr;
2883 }
2884 }
2885 }
2886
2887 return UniformValue;
2888 };
2889
2890 if (Callee->hasExactDefinition()) {
2891 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2892 bool MadeChange = false;
2893 for (Use &U : make_early_inc_range(RV->uses())) {
2894 auto *I = dyn_cast<Instruction>(U.getUser());
2895 if (!I || I->getParent() != CI->getParent()) {
2896 // Limit to the same basic block to avoid extending the call-site live
2897 // range, which otherwise could increase register pressure.
2898 continue;
2899 }
2900 if (CI->comesBefore(I)) {
2901 U.set(CI);
2902 MadeChange = true;
2903 }
2904 }
2905
2906 return MadeChange;
2907 }
2908 }
2909
2910 return false;
2911}
2912
2914 const CallInst *CI) {
2915 assert(CI && CI->use_empty());
2916
2917 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2918 switch (II->getIntrinsicID()) {
2919 case Intrinsic::memset:
2920 case Intrinsic::memcpy:
2921 case Intrinsic::memmove:
2922 return true;
2923 default:
2924 return false;
2925 }
2926
2927 LibFunc LF;
2928 Function *Callee = CI->getCalledFunction();
2929 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2930 switch (LF) {
2931 case LibFunc_strcpy:
2932 case LibFunc_strncpy:
2933 case LibFunc_strcat:
2934 case LibFunc_strncat:
2935 return true;
2936 default:
2937 return false;
2938 }
2939
2940 return false;
2941}
2942
2943/// Look for opportunities to duplicate return instructions to the predecessor
2944/// to enable tail call optimizations. The case it is currently looking for is
2945/// the following one. Known intrinsics or library function that may be tail
2946/// called are taken into account as well.
2947/// @code
2948/// bb0:
2949/// %tmp0 = tail call i32 @f0()
2950/// br label %return
2951/// bb1:
2952/// %tmp1 = tail call i32 @f1()
2953/// br label %return
2954/// bb2:
2955/// %tmp2 = tail call i32 @f2()
2956/// br label %return
2957/// return:
2958/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2959/// ret i32 %retval
2960/// @endcode
2961///
2962/// =>
2963///
2964/// @code
2965/// bb0:
2966/// %tmp0 = tail call i32 @f0()
2967/// ret i32 %tmp0
2968/// bb1:
2969/// %tmp1 = tail call i32 @f1()
2970/// ret i32 %tmp1
2971/// bb2:
2972/// %tmp2 = tail call i32 @f2()
2973/// ret i32 %tmp2
2974/// @endcode
2975bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2976 ModifyDT &ModifiedDT) {
2977 if (!BB->getTerminator())
2978 return false;
2979
2980 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2981 if (!RetI)
2982 return false;
2983
2984 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2985
2986 PHINode *PN = nullptr;
2987 ExtractValueInst *EVI = nullptr;
2988 BitCastInst *BCI = nullptr;
2989 Value *V = RetI->getReturnValue();
2990 if (V) {
2991 BCI = dyn_cast<BitCastInst>(V);
2992 if (BCI)
2993 V = BCI->getOperand(0);
2994
2996 if (EVI) {
2997 V = EVI->getOperand(0);
2998 if (!llvm::all_of(EVI->indices(), equal_to(0)))
2999 return false;
3000 }
3001
3002 PN = dyn_cast<PHINode>(V);
3003 }
3004
3005 if (PN && PN->getParent() != BB)
3006 return false;
3007
3008 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
3009 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
3010 if (BC && BC->hasOneUse())
3011 Inst = BC->user_back();
3012
3013 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
3014 return II->getIntrinsicID() == Intrinsic::lifetime_end;
3015 return false;
3016 };
3017
3019
3020 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
3021 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
3022 II && II->getIntrinsicID() == Intrinsic::fake_use) {
3023 // Record the instruction so it can be preserved when the exit block is
3024 // removed. Do not preserve the fake use that uses the result of the
3025 // PHI instruction.
3026 // Do not copy fake uses that use the result of a PHI node.
3027 // FIXME: If we do want to copy the fake use into the return blocks, we
3028 // have to figure out which of the PHI node operands to use for each
3029 // copy.
3030 if (!isa<PHINode>(II->getOperand(0))) {
3031 FakeUses.push_back(II);
3032 }
3033 return true;
3034 }
3035
3036 return false;
3037 };
3038
3039 // Make sure there are no instructions between the first instruction
3040 // and return.
3042 // Skip over pseudo-probes and the bitcast.
3043 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3044 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3045 BI = std::next(BI);
3046 if (&*BI != RetI)
3047 return false;
3048
3049 // Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3050 // call.
3051 auto MayBePermittedAsTailCall = [&](const auto *CI) {
3052 return TLI->mayBeEmittedAsTailCall(CI) &&
3053 attributesPermitTailCall(BB->getParent(), CI, RetI, *TLI);
3054 };
3055
3056 SmallVector<BasicBlock *, 4> TailCallBBs;
3057 // Record the call instructions so we can insert any fake uses
3058 // that need to be preserved before them.
3060 if (PN) {
3061 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3062 // Look through bitcasts.
3063 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3064 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3065 BasicBlock *PredBB = PN->getIncomingBlock(I);
3066 // Make sure the phi value is indeed produced by the tail call.
3067 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3068 MayBePermittedAsTailCall(CI)) {
3069 TailCallBBs.push_back(PredBB);
3070 CallInsts.push_back(CI);
3071 } else {
3072 // Consider the cases in which the phi value is indirectly produced by
3073 // the tail call, for example when encountering memset(), memmove(),
3074 // strcpy(), whose return value may have been optimized out. In such
3075 // cases, the value needs to be the first function argument.
3076 //
3077 // bb0:
3078 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3079 // br label %return
3080 // return:
3081 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3082 if (PredBB && PredBB->getSingleSuccessor() == BB)
3084 PredBB->getTerminator()->getPrevNode());
3085
3086 if (CI && CI->use_empty() &&
3087 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3088 IncomingVal == CI->getArgOperand(0) &&
3089 MayBePermittedAsTailCall(CI)) {
3090 TailCallBBs.push_back(PredBB);
3091 CallInsts.push_back(CI);
3092 }
3093 }
3094 }
3095 } else {
3096 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3097 for (BasicBlock *Pred : predecessors(BB)) {
3098 if (!VisitedBBs.insert(Pred).second)
3099 continue;
3100 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3101 CallInst *CI = dyn_cast<CallInst>(I);
3102 if (CI && CI->use_empty() && MayBePermittedAsTailCall(CI)) {
3103 // Either we return void or the return value must be the first
3104 // argument of a known intrinsic or library function.
3105 if (!V || isa<UndefValue>(V) ||
3106 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3107 V == CI->getArgOperand(0))) {
3108 TailCallBBs.push_back(Pred);
3109 CallInsts.push_back(CI);
3110 }
3111 }
3112 }
3113 }
3114 }
3115
3116 bool Changed = false;
3117 for (auto const &TailCallBB : TailCallBBs) {
3118 // Make sure the call instruction is followed by an unconditional branch to
3119 // the return block.
3120 UncondBrInst *BI = dyn_cast<UncondBrInst>(TailCallBB->getTerminator());
3121 if (!BI || BI->getSuccessor() != BB)
3122 continue;
3123
3124 // Duplicate the return into TailCallBB.
3125 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB, DTU);
3127 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3128 BFI->setBlockFreq(BB,
3129 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3130 ModifiedDT = ModifyDT::ModifyBBDT;
3131 Changed = true;
3132 ++NumRetsDup;
3133 }
3134
3135 // If we eliminated all predecessors of the block, delete the block now.
3136 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3137 // Copy the fake uses found in the original return block to all blocks
3138 // that contain tail calls.
3139 for (auto *CI : CallInsts) {
3140 for (auto const *FakeUse : FakeUses) {
3141 auto *ClonedInst = FakeUse->clone();
3142 ClonedInst->insertBefore(CI->getIterator());
3143 }
3144 }
3145 DTU->deleteBB(BB);
3146 }
3147
3148 return Changed;
3149}
3150
3151//===----------------------------------------------------------------------===//
3152// Memory Optimization
3153//===----------------------------------------------------------------------===//
3154
3155namespace {
3156
3157/// This is an extended version of TargetLowering::AddrMode
3158/// which holds actual Value*'s for register values.
3159struct ExtAddrMode : public TargetLowering::AddrMode {
3160 Value *BaseReg = nullptr;
3161 Value *ScaledReg = nullptr;
3162 Value *OriginalValue = nullptr;
3163 bool InBounds = true;
3164
3165 enum FieldName {
3166 NoField = 0x00,
3167 BaseRegField = 0x01,
3168 BaseGVField = 0x02,
3169 BaseOffsField = 0x04,
3170 ScaledRegField = 0x08,
3171 ScaleField = 0x10,
3172 MultipleFields = 0xff
3173 };
3174
3175 ExtAddrMode() = default;
3176
3177 void print(raw_ostream &OS) const;
3178 void dump() const;
3179
3180 // Replace From in ExtAddrMode with To.
3181 // E.g., SExt insts may be promoted and deleted. We should replace them with
3182 // the promoted values.
3183 void replaceWith(Value *From, Value *To) {
3184 if (ScaledReg == From)
3185 ScaledReg = To;
3186 }
3187
3188 FieldName compare(const ExtAddrMode &other) {
3189 // First check that the types are the same on each field, as differing types
3190 // is something we can't cope with later on.
3191 if (BaseReg && other.BaseReg &&
3192 BaseReg->getType() != other.BaseReg->getType())
3193 return MultipleFields;
3194 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3195 return MultipleFields;
3196 if (ScaledReg && other.ScaledReg &&
3197 ScaledReg->getType() != other.ScaledReg->getType())
3198 return MultipleFields;
3199
3200 // Conservatively reject 'inbounds' mismatches.
3201 if (InBounds != other.InBounds)
3202 return MultipleFields;
3203
3204 // Check each field to see if it differs.
3205 unsigned Result = NoField;
3206 if (BaseReg != other.BaseReg)
3207 Result |= BaseRegField;
3208 if (BaseGV != other.BaseGV)
3209 Result |= BaseGVField;
3210 if (BaseOffs != other.BaseOffs)
3211 Result |= BaseOffsField;
3212 if (ScaledReg != other.ScaledReg)
3213 Result |= ScaledRegField;
3214 // Don't count 0 as being a different scale, because that actually means
3215 // unscaled (which will already be counted by having no ScaledReg).
3216 if (Scale && other.Scale && Scale != other.Scale)
3217 Result |= ScaleField;
3218
3219 if (llvm::popcount(Result) > 1)
3220 return MultipleFields;
3221 else
3222 return static_cast<FieldName>(Result);
3223 }
3224
3225 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3226 // with no offset.
3227 bool isTrivial() {
3228 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3229 // trivial if at most one of these terms is nonzero, except that BaseGV and
3230 // BaseReg both being zero actually means a null pointer value, which we
3231 // consider to be 'non-zero' here.
3232 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3233 }
3234
3235 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3236 switch (Field) {
3237 default:
3238 return nullptr;
3239 case BaseRegField:
3240 return BaseReg;
3241 case BaseGVField:
3242 return BaseGV;
3243 case ScaledRegField:
3244 return ScaledReg;
3245 case BaseOffsField:
3246 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3247 }
3248 }
3249
3250 void SetCombinedField(FieldName Field, Value *V,
3251 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3252 switch (Field) {
3253 default:
3254 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3255 break;
3256 case ExtAddrMode::BaseRegField:
3257 BaseReg = V;
3258 break;
3259 case ExtAddrMode::BaseGVField:
3260 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3261 // in the BaseReg field.
3262 assert(BaseReg == nullptr);
3263 BaseReg = V;
3264 BaseGV = nullptr;
3265 break;
3266 case ExtAddrMode::ScaledRegField:
3267 ScaledReg = V;
3268 // If we have a mix of scaled and unscaled addrmodes then we want scale
3269 // to be the scale and not zero.
3270 if (!Scale)
3271 for (const ExtAddrMode &AM : AddrModes)
3272 if (AM.Scale) {
3273 Scale = AM.Scale;
3274 break;
3275 }
3276 break;
3277 case ExtAddrMode::BaseOffsField:
3278 // The offset is no longer a constant, so it goes in ScaledReg with a
3279 // scale of 1.
3280 assert(ScaledReg == nullptr);
3281 ScaledReg = V;
3282 Scale = 1;
3283 BaseOffs = 0;
3284 break;
3285 }
3286 }
3287};
3288
3289#ifndef NDEBUG
3290static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3291 AM.print(OS);
3292 return OS;
3293}
3294#endif
3295
3296#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3297void ExtAddrMode::print(raw_ostream &OS) const {
3298 bool NeedPlus = false;
3299 OS << "[";
3300 if (InBounds)
3301 OS << "inbounds ";
3302 if (BaseGV) {
3303 OS << "GV:";
3304 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3305 NeedPlus = true;
3306 }
3307
3308 if (BaseOffs) {
3309 OS << (NeedPlus ? " + " : "") << BaseOffs;
3310 NeedPlus = true;
3311 }
3312
3313 if (BaseReg) {
3314 OS << (NeedPlus ? " + " : "") << "Base:";
3315 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3316 NeedPlus = true;
3317 }
3318 if (Scale) {
3319 OS << (NeedPlus ? " + " : "") << Scale << "*";
3320 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3321 }
3322
3323 OS << ']';
3324}
3325
3326LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3327 print(dbgs());
3328 dbgs() << '\n';
3329}
3330#endif
3331
3332} // end anonymous namespace
3333
3334namespace {
3335
3336/// This class provides transaction based operation on the IR.
3337/// Every change made through this class is recorded in the internal state and
3338/// can be undone (rollback) until commit is called.
3339/// CGP does not check if instructions could be speculatively executed when
3340/// moved. Preserving the original location would pessimize the debugging
3341/// experience, as well as negatively impact the quality of sample PGO.
3342class TypePromotionTransaction {
3343 /// This represents the common interface of the individual transaction.
3344 /// Each class implements the logic for doing one specific modification on
3345 /// the IR via the TypePromotionTransaction.
3346 class TypePromotionAction {
3347 protected:
3348 /// The Instruction modified.
3349 Instruction *Inst;
3350
3351 public:
3352 /// Constructor of the action.
3353 /// The constructor performs the related action on the IR.
3354 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3355
3356 virtual ~TypePromotionAction() = default;
3357
3358 /// Undo the modification done by this action.
3359 /// When this method is called, the IR must be in the same state as it was
3360 /// before this action was applied.
3361 /// \pre Undoing the action works if and only if the IR is in the exact same
3362 /// state as it was directly after this action was applied.
3363 virtual void undo() = 0;
3364
3365 /// Advocate every change made by this action.
3366 /// When the results on the IR of the action are to be kept, it is important
3367 /// to call this function, otherwise hidden information may be kept forever.
3368 virtual void commit() {
3369 // Nothing to be done, this action is not doing anything.
3370 }
3371 };
3372
3373 /// Utility to remember the position of an instruction.
3374 class InsertionHandler {
3375 /// Position of an instruction.
3376 /// Either an instruction:
3377 /// - Is the first in a basic block: BB is used.
3378 /// - Has a previous instruction: PrevInst is used.
3379 struct {
3380 BasicBlock::iterator PrevInst;
3381 BasicBlock *BB;
3382 } Point;
3383 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3384
3385 /// Remember whether or not the instruction had a previous instruction.
3386 bool HasPrevInstruction;
3387
3388 public:
3389 /// Record the position of \p Inst.
3390 InsertionHandler(Instruction *Inst) {
3391 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3392 BasicBlock *BB = Inst->getParent();
3393
3394 // Record where we would have to re-insert the instruction in the sequence
3395 // of DbgRecords, if we ended up reinserting.
3396 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3397
3398 if (HasPrevInstruction) {
3399 Point.PrevInst = std::prev(Inst->getIterator());
3400 } else {
3401 Point.BB = BB;
3402 }
3403 }
3404
3405 /// Insert \p Inst at the recorded position.
3406 void insert(Instruction *Inst) {
3407 if (HasPrevInstruction) {
3408 if (Inst->getParent())
3409 Inst->removeFromParent();
3410 Inst->insertAfter(Point.PrevInst);
3411 } else {
3412 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3413 if (Inst->getParent())
3414 Inst->moveBefore(*Point.BB, Position);
3415 else
3416 Inst->insertBefore(*Point.BB, Position);
3417 }
3418
3419 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3420 }
3421 };
3422
3423 /// Move an instruction before another.
3424 class InstructionMoveBefore : public TypePromotionAction {
3425 /// Original position of the instruction.
3426 InsertionHandler Position;
3427
3428 public:
3429 /// Move \p Inst before \p Before.
3430 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3431 : TypePromotionAction(Inst), Position(Inst) {
3432 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3433 << "\n");
3434 Inst->moveBefore(Before);
3435 }
3436
3437 /// Move the instruction back to its original position.
3438 void undo() override {
3439 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3440 Position.insert(Inst);
3441 }
3442 };
3443
3444 /// Set the operand of an instruction with a new value.
3445 class OperandSetter : public TypePromotionAction {
3446 /// Original operand of the instruction.
3447 Value *Origin;
3448
3449 /// Index of the modified instruction.
3450 unsigned Idx;
3451
3452 public:
3453 /// Set \p Idx operand of \p Inst with \p NewVal.
3454 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3455 : TypePromotionAction(Inst), Idx(Idx) {
3456 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3457 << "for:" << *Inst << "\n"
3458 << "with:" << *NewVal << "\n");
3459 Origin = Inst->getOperand(Idx);
3460 Inst->setOperand(Idx, NewVal);
3461 }
3462
3463 /// Restore the original value of the instruction.
3464 void undo() override {
3465 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3466 << "for: " << *Inst << "\n"
3467 << "with: " << *Origin << "\n");
3468 Inst->setOperand(Idx, Origin);
3469 }
3470 };
3471
3472 /// Hide the operands of an instruction.
3473 /// Do as if this instruction was not using any of its operands.
3474 class OperandsHider : public TypePromotionAction {
3475 /// The list of original operands.
3476 SmallVector<Value *, 4> OriginalValues;
3477
3478 public:
3479 /// Remove \p Inst from the uses of the operands of \p Inst.
3480 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3481 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3482 unsigned NumOpnds = Inst->getNumOperands();
3483 OriginalValues.reserve(NumOpnds);
3484 for (unsigned It = 0; It < NumOpnds; ++It) {
3485 // Save the current operand.
3486 Value *Val = Inst->getOperand(It);
3487 OriginalValues.push_back(Val);
3488 // Set a dummy one.
3489 // We could use OperandSetter here, but that would imply an overhead
3490 // that we are not willing to pay.
3491 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3492 }
3493 }
3494
3495 /// Restore the original list of uses.
3496 void undo() override {
3497 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3498 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3499 Inst->setOperand(It, OriginalValues[It]);
3500 }
3501 };
3502
3503 /// Build a truncate instruction.
3504 class TruncBuilder : public TypePromotionAction {
3505 Value *Val;
3506
3507 public:
3508 /// Build a truncate instruction of \p Opnd producing a \p Ty
3509 /// result.
3510 /// trunc Opnd to Ty.
3511 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3512 IRBuilder<> Builder(Opnd);
3513 Builder.SetCurrentDebugLocation(DebugLoc());
3514 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3515 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3516 }
3517
3518 /// Get the built value.
3519 Value *getBuiltValue() { return Val; }
3520
3521 /// Remove the built instruction.
3522 void undo() override {
3523 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3524 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3525 IVal->eraseFromParent();
3526 }
3527 };
3528
3529 /// Build a sign extension instruction.
3530 class SExtBuilder : public TypePromotionAction {
3531 Value *Val;
3532
3533 public:
3534 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3535 /// result.
3536 /// sext Opnd to Ty.
3537 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3538 : TypePromotionAction(InsertPt) {
3539 IRBuilder<> Builder(InsertPt);
3540 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3541 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3542 }
3543
3544 /// Get the built value.
3545 Value *getBuiltValue() { return Val; }
3546
3547 /// Remove the built instruction.
3548 void undo() override {
3549 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3550 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3551 IVal->eraseFromParent();
3552 }
3553 };
3554
3555 /// Build a zero extension instruction.
3556 class ZExtBuilder : public TypePromotionAction {
3557 Value *Val;
3558
3559 public:
3560 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3561 /// result.
3562 /// zext Opnd to Ty.
3563 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3564 : TypePromotionAction(InsertPt) {
3565 IRBuilder<> Builder(InsertPt);
3566 Builder.SetCurrentDebugLocation(DebugLoc());
3567 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3568 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3569 }
3570
3571 /// Get the built value.
3572 Value *getBuiltValue() { return Val; }
3573
3574 /// Remove the built instruction.
3575 void undo() override {
3576 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3577 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3578 IVal->eraseFromParent();
3579 }
3580 };
3581
3582 /// Mutate an instruction to another type.
3583 class TypeMutator : public TypePromotionAction {
3584 /// Record the original type.
3585 Type *OrigTy;
3586
3587 public:
3588 /// Mutate the type of \p Inst into \p NewTy.
3589 TypeMutator(Instruction *Inst, Type *NewTy)
3590 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3591 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3592 << "\n");
3593 Inst->mutateType(NewTy);
3594 }
3595
3596 /// Mutate the instruction back to its original type.
3597 void undo() override {
3598 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3599 << "\n");
3600 Inst->mutateType(OrigTy);
3601 }
3602 };
3603
3604 /// Replace the uses of an instruction by another instruction.
3605 class UsesReplacer : public TypePromotionAction {
3606 /// Helper structure to keep track of the replaced uses.
3607 struct InstructionAndIdx {
3608 /// The instruction using the instruction.
3609 Instruction *Inst;
3610
3611 /// The index where this instruction is used for Inst.
3612 unsigned Idx;
3613
3614 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3615 : Inst(Inst), Idx(Idx) {}
3616 };
3617
3618 /// Keep track of the original uses (pair Instruction, Index).
3620 /// Keep track of the debug users.
3621 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3622
3623 /// Keep track of the new value so that we can undo it by replacing
3624 /// instances of the new value with the original value.
3625 Value *New;
3626
3628
3629 public:
3630 /// Replace all the use of \p Inst by \p New.
3631 UsesReplacer(Instruction *Inst, Value *New)
3632 : TypePromotionAction(Inst), New(New) {
3633 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3634 << "\n");
3635 // Record the original uses.
3636 for (Use &U : Inst->uses()) {
3637 Instruction *UserI = cast<Instruction>(U.getUser());
3638 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3639 }
3640 // Record the debug uses separately. They are not in the instruction's
3641 // use list, but they are replaced by RAUW.
3642 findDbgValues(Inst, DbgVariableRecords);
3643
3644 // Now, we can replace the uses.
3645 Inst->replaceAllUsesWith(New);
3646 }
3647
3648 /// Reassign the original uses of Inst to Inst.
3649 void undo() override {
3650 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3651 for (InstructionAndIdx &Use : OriginalUses)
3652 Use.Inst->setOperand(Use.Idx, Inst);
3653 // RAUW has replaced all original uses with references to the new value,
3654 // including the debug uses. Since we are undoing the replacements,
3655 // the original debug uses must also be reinstated to maintain the
3656 // correctness and utility of debug value records.
3657 for (DbgVariableRecord *DVR : DbgVariableRecords)
3658 DVR->replaceVariableLocationOp(New, Inst);
3659 }
3660 };
3661
3662 /// Remove an instruction from the IR.
3663 class InstructionRemover : public TypePromotionAction {
3664 /// Original position of the instruction.
3665 InsertionHandler Inserter;
3666
3667 /// Helper structure to hide all the link to the instruction. In other
3668 /// words, this helps to do as if the instruction was removed.
3669 OperandsHider Hider;
3670
3671 /// Keep track of the uses replaced, if any.
3672 UsesReplacer *Replacer = nullptr;
3673
3674 /// Keep track of instructions removed.
3675 SetOfInstrs &RemovedInsts;
3676
3677 public:
3678 /// Remove all reference of \p Inst and optionally replace all its
3679 /// uses with New.
3680 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3681 /// \pre If !Inst->use_empty(), then New != nullptr
3682 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3683 Value *New = nullptr)
3684 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3685 RemovedInsts(RemovedInsts) {
3686 if (New)
3687 Replacer = new UsesReplacer(Inst, New);
3688 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3689 RemovedInsts.insert(Inst);
3690 /// The instructions removed here will be freed after completing
3691 /// optimizeBlock() for all blocks as we need to keep track of the
3692 /// removed instructions during promotion.
3693 Inst->removeFromParent();
3694 }
3695
3696 ~InstructionRemover() override { delete Replacer; }
3697
3698 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3699 InstructionRemover(const InstructionRemover &other) = delete;
3700
3701 /// Resurrect the instruction and reassign it to the proper uses if
3702 /// new value was provided when build this action.
3703 void undo() override {
3704 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3705 Inserter.insert(Inst);
3706 if (Replacer)
3707 Replacer->undo();
3708 Hider.undo();
3709 RemovedInsts.erase(Inst);
3710 }
3711 };
3712
3713public:
3714 /// Restoration point.
3715 /// The restoration point is a pointer to an action instead of an iterator
3716 /// because the iterator may be invalidated but not the pointer.
3717 using ConstRestorationPt = const TypePromotionAction *;
3718
3719 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3720 : RemovedInsts(RemovedInsts) {}
3721
3722 /// Advocate every changes made in that transaction. Return true if any change
3723 /// happen.
3724 bool commit();
3725
3726 /// Undo all the changes made after the given point.
3727 void rollback(ConstRestorationPt Point);
3728
3729 /// Get the current restoration point.
3730 ConstRestorationPt getRestorationPoint() const;
3731
3732 /// \name API for IR modification with state keeping to support rollback.
3733 /// @{
3734 /// Same as Instruction::setOperand.
3735 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3736
3737 /// Same as Instruction::eraseFromParent.
3738 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3739
3740 /// Same as Value::replaceAllUsesWith.
3741 void replaceAllUsesWith(Instruction *Inst, Value *New);
3742
3743 /// Same as Value::mutateType.
3744 void mutateType(Instruction *Inst, Type *NewTy);
3745
3746 /// Same as IRBuilder::createTrunc.
3747 Value *createTrunc(Instruction *Opnd, Type *Ty);
3748
3749 /// Same as IRBuilder::createSExt.
3750 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3751
3752 /// Same as IRBuilder::createZExt.
3753 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3754
3755private:
3756 /// The ordered list of actions made so far.
3758
3759 using CommitPt =
3760 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3761
3762 SetOfInstrs &RemovedInsts;
3763};
3764
3765} // end anonymous namespace
3766
3767void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3768 Value *NewVal) {
3769 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3770 Inst, Idx, NewVal));
3771}
3772
3773void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3774 Value *NewVal) {
3775 Actions.push_back(
3776 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3777 Inst, RemovedInsts, NewVal));
3778}
3779
3780void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3781 Value *New) {
3782 Actions.push_back(
3783 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3784}
3785
3786void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3787 Actions.push_back(
3788 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3789}
3790
3791Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3792 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3793 Value *Val = Ptr->getBuiltValue();
3794 Actions.push_back(std::move(Ptr));
3795 return Val;
3796}
3797
3798Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3799 Type *Ty) {
3800 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3801 Value *Val = Ptr->getBuiltValue();
3802 Actions.push_back(std::move(Ptr));
3803 return Val;
3804}
3805
3806Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3807 Type *Ty) {
3808 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3809 Value *Val = Ptr->getBuiltValue();
3810 Actions.push_back(std::move(Ptr));
3811 return Val;
3812}
3813
3814TypePromotionTransaction::ConstRestorationPt
3815TypePromotionTransaction::getRestorationPoint() const {
3816 return !Actions.empty() ? Actions.back().get() : nullptr;
3817}
3818
3819bool TypePromotionTransaction::commit() {
3820 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3821 Action->commit();
3822 bool Modified = !Actions.empty();
3823 Actions.clear();
3824 return Modified;
3825}
3826
3827void TypePromotionTransaction::rollback(
3828 TypePromotionTransaction::ConstRestorationPt Point) {
3829 while (!Actions.empty() && Point != Actions.back().get()) {
3830 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3831 Curr->undo();
3832 }
3833}
3834
3835namespace {
3836
3837/// A helper class for matching addressing modes.
3838///
3839/// This encapsulates the logic for matching the target-legal addressing modes.
3840class AddressingModeMatcher {
3841 SmallVectorImpl<Instruction *> &AddrModeInsts;
3842 const TargetLowering &TLI;
3843 const TargetRegisterInfo &TRI;
3844 const DataLayout &DL;
3845 const LoopInfo &LI;
3846 const std::function<const DominatorTree &()> getDTFn;
3847
3848 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3849 /// the memory instruction that we're computing this address for.
3850 Type *AccessTy;
3851 unsigned AddrSpace;
3852 Instruction *MemoryInst;
3853
3854 /// This is the addressing mode that we're building up. This is
3855 /// part of the return value of this addressing mode matching stuff.
3856 ExtAddrMode &AddrMode;
3857
3858 /// The instructions inserted by other CodeGenPrepare optimizations.
3859 const SetOfInstrs &InsertedInsts;
3860
3861 /// A map from the instructions to their type before promotion.
3862 InstrToOrigTy &PromotedInsts;
3863
3864 /// The ongoing transaction where every action should be registered.
3865 TypePromotionTransaction &TPT;
3866
3867 // A GEP which has too large offset to be folded into the addressing mode.
3868 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3869
3870 /// This is set to true when we should not do profitability checks.
3871 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3872 bool IgnoreProfitability;
3873
3874 /// True if we are optimizing for size.
3875 bool OptSize = false;
3876
3877 ProfileSummaryInfo *PSI;
3878 BlockFrequencyInfo *BFI;
3879
3880 AddressingModeMatcher(
3881 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3882 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3883 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3884 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3885 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3886 TypePromotionTransaction &TPT,
3887 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3888 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3889 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3890 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3891 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3892 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3893 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3894 IgnoreProfitability = false;
3895 }
3896
3897public:
3898 /// Find the maximal addressing mode that a load/store of V can fold,
3899 /// give an access type of AccessTy. This returns a list of involved
3900 /// instructions in AddrModeInsts.
3901 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3902 /// optimizations.
3903 /// \p PromotedInsts maps the instructions to their type before promotion.
3904 /// \p The ongoing transaction where every action should be registered.
3905 static ExtAddrMode
3906 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3907 SmallVectorImpl<Instruction *> &AddrModeInsts,
3908 const TargetLowering &TLI, const LoopInfo &LI,
3909 const std::function<const DominatorTree &()> getDTFn,
3910 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3911 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3912 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3913 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3914 ExtAddrMode Result;
3915
3916 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3917 AccessTy, AS, MemoryInst, Result,
3918 InsertedInsts, PromotedInsts, TPT,
3919 LargeOffsetGEP, OptSize, PSI, BFI)
3920 .matchAddr(V, 0);
3921 (void)Success;
3922 assert(Success && "Couldn't select *anything*?");
3923 return Result;
3924 }
3925
3926private:
3927 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3928 bool matchAddr(Value *Addr, unsigned Depth);
3929 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3930 bool *MovedAway = nullptr);
3931 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3932 ExtAddrMode &AMBefore,
3933 ExtAddrMode &AMAfter);
3934 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3935 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3936 Value *PromotedOperand) const;
3937};
3938
3939class PhiNodeSet;
3940
3941/// An iterator for PhiNodeSet.
3942class PhiNodeSetIterator {
3943 PhiNodeSet *const Set;
3944 size_t CurrentIndex = 0;
3945
3946public:
3947 /// The constructor. Start should point to either a valid element, or be equal
3948 /// to the size of the underlying SmallVector of the PhiNodeSet.
3949 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3950 PHINode *operator*() const;
3951 PhiNodeSetIterator &operator++();
3952 bool operator==(const PhiNodeSetIterator &RHS) const;
3953 bool operator!=(const PhiNodeSetIterator &RHS) const;
3954};
3955
3956/// Keeps a set of PHINodes.
3957///
3958/// This is a minimal set implementation for a specific use case:
3959/// It is very fast when there are very few elements, but also provides good
3960/// performance when there are many. It is similar to SmallPtrSet, but also
3961/// provides iteration by insertion order, which is deterministic and stable
3962/// across runs. It is also similar to SmallSetVector, but provides removing
3963/// elements in O(1) time. This is achieved by not actually removing the element
3964/// from the underlying vector, so comes at the cost of using more memory, but
3965/// that is fine, since PhiNodeSets are used as short lived objects.
3966class PhiNodeSet {
3967 friend class PhiNodeSetIterator;
3968
3969 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3970 using iterator = PhiNodeSetIterator;
3971
3972 /// Keeps the elements in the order of their insertion in the underlying
3973 /// vector. To achieve constant time removal, it never deletes any element.
3975
3976 /// Keeps the elements in the underlying set implementation. This (and not the
3977 /// NodeList defined above) is the source of truth on whether an element
3978 /// is actually in the collection.
3979 MapType NodeMap;
3980
3981 /// Points to the first valid (not deleted) element when the set is not empty
3982 /// and the value is not zero. Equals to the size of the underlying vector
3983 /// when the set is empty. When the value is 0, as in the beginning, the
3984 /// first element may or may not be valid.
3985 size_t FirstValidElement = 0;
3986
3987public:
3988 /// Inserts a new element to the collection.
3989 /// \returns true if the element is actually added, i.e. was not in the
3990 /// collection before the operation.
3991 bool insert(PHINode *Ptr) {
3992 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3993 NodeList.push_back(Ptr);
3994 return true;
3995 }
3996 return false;
3997 }
3998
3999 /// Removes the element from the collection.
4000 /// \returns whether the element is actually removed, i.e. was in the
4001 /// collection before the operation.
4002 bool erase(PHINode *Ptr) {
4003 if (NodeMap.erase(Ptr)) {
4004 SkipRemovedElements(FirstValidElement);
4005 return true;
4006 }
4007 return false;
4008 }
4009
4010 /// Removes all elements and clears the collection.
4011 void clear() {
4012 NodeMap.clear();
4013 NodeList.clear();
4014 FirstValidElement = 0;
4015 }
4016
4017 /// \returns an iterator that will iterate the elements in the order of
4018 /// insertion.
4019 iterator begin() {
4020 if (FirstValidElement == 0)
4021 SkipRemovedElements(FirstValidElement);
4022 return PhiNodeSetIterator(this, FirstValidElement);
4023 }
4024
4025 /// \returns an iterator that points to the end of the collection.
4026 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
4027
4028 /// Returns the number of elements in the collection.
4029 size_t size() const { return NodeMap.size(); }
4030
4031 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
4032 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
4033
4034private:
4035 /// Updates the CurrentIndex so that it will point to a valid element.
4036 ///
4037 /// If the element of NodeList at CurrentIndex is valid, it does not
4038 /// change it. If there are no more valid elements, it updates CurrentIndex
4039 /// to point to the end of the NodeList.
4040 void SkipRemovedElements(size_t &CurrentIndex) {
4041 while (CurrentIndex < NodeList.size()) {
4042 auto it = NodeMap.find(NodeList[CurrentIndex]);
4043 // If the element has been deleted and added again later, NodeMap will
4044 // point to a different index, so CurrentIndex will still be invalid.
4045 if (it != NodeMap.end() && it->second == CurrentIndex)
4046 break;
4047 ++CurrentIndex;
4048 }
4049 }
4050};
4051
4052PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4053 : Set(Set), CurrentIndex(Start) {}
4054
4055PHINode *PhiNodeSetIterator::operator*() const {
4056 assert(CurrentIndex < Set->NodeList.size() &&
4057 "PhiNodeSet access out of range");
4058 return Set->NodeList[CurrentIndex];
4059}
4060
4061PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4062 assert(CurrentIndex < Set->NodeList.size() &&
4063 "PhiNodeSet access out of range");
4064 ++CurrentIndex;
4065 Set->SkipRemovedElements(CurrentIndex);
4066 return *this;
4067}
4068
4069bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4070 return CurrentIndex == RHS.CurrentIndex;
4071}
4072
4073bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4074 return !((*this) == RHS);
4075}
4076
4077/// Keep track of simplification of Phi nodes.
4078/// Accept the set of all phi nodes and erase phi node from this set
4079/// if it is simplified.
4080class SimplificationTracker {
4081 DenseMap<Value *, Value *> Storage;
4082 // Tracks newly created Phi nodes. The elements are iterated by insertion
4083 // order.
4084 PhiNodeSet AllPhiNodes;
4085 // Tracks newly created Select nodes.
4086 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4087
4088public:
4089 Value *Get(Value *V) {
4090 do {
4091 auto SV = Storage.find(V);
4092 if (SV == Storage.end())
4093 return V;
4094 V = SV->second;
4095 } while (true);
4096 }
4097
4098 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4099
4100 void ReplacePhi(PHINode *From, PHINode *To) {
4101 Value *OldReplacement = Get(From);
4102 while (OldReplacement != From) {
4103 From = To;
4104 To = dyn_cast<PHINode>(OldReplacement);
4105 OldReplacement = Get(From);
4106 }
4107 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4108 Put(From, To);
4109 From->replaceAllUsesWith(To);
4110 AllPhiNodes.erase(From);
4111 From->eraseFromParent();
4112 }
4113
4114 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4115
4116 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4117
4118 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4119
4120 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4121
4122 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4123
4124 void destroyNewNodes(Type *CommonType) {
4125 // For safe erasing, replace the uses with dummy value first.
4126 auto *Dummy = PoisonValue::get(CommonType);
4127 for (auto *I : AllPhiNodes) {
4128 I->replaceAllUsesWith(Dummy);
4129 I->eraseFromParent();
4130 }
4131 AllPhiNodes.clear();
4132 for (auto *I : AllSelectNodes) {
4133 I->replaceAllUsesWith(Dummy);
4134 I->eraseFromParent();
4135 }
4136 AllSelectNodes.clear();
4137 }
4138};
4139
4140/// A helper class for combining addressing modes.
4141class AddressingModeCombiner {
4142 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4143 typedef std::pair<PHINode *, PHINode *> PHIPair;
4144
4145private:
4146 /// The addressing modes we've collected.
4148
4149 /// The field in which the AddrModes differ, when we have more than one.
4150 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4151
4152 /// Are the AddrModes that we have all just equal to their original values?
4153 bool AllAddrModesTrivial = true;
4154
4155 /// Common Type for all different fields in addressing modes.
4156 Type *CommonType = nullptr;
4157
4158 const DataLayout &DL;
4159
4160 /// Original Address.
4161 Value *Original;
4162
4163 /// Common value among addresses
4164 Value *CommonValue = nullptr;
4165
4166public:
4167 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4168 : DL(DL), Original(OriginalValue) {}
4169
4170 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4171
4172 /// Get the combined AddrMode
4173 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4174
4175 /// Add a new AddrMode if it's compatible with the AddrModes we already
4176 /// have.
4177 /// \return True iff we succeeded in doing so.
4178 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4179 // Take note of if we have any non-trivial AddrModes, as we need to detect
4180 // when all AddrModes are trivial as then we would introduce a phi or select
4181 // which just duplicates what's already there.
4182 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4183
4184 // If this is the first addrmode then everything is fine.
4185 if (AddrModes.empty()) {
4186 AddrModes.emplace_back(NewAddrMode);
4187 return true;
4188 }
4189
4190 // Figure out how different this is from the other address modes, which we
4191 // can do just by comparing against the first one given that we only care
4192 // about the cumulative difference.
4193 ExtAddrMode::FieldName ThisDifferentField =
4194 AddrModes[0].compare(NewAddrMode);
4195 if (DifferentField == ExtAddrMode::NoField)
4196 DifferentField = ThisDifferentField;
4197 else if (DifferentField != ThisDifferentField)
4198 DifferentField = ExtAddrMode::MultipleFields;
4199
4200 // If NewAddrMode differs in more than one dimension we cannot handle it.
4201 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4202
4203 // If Scale Field is different then we reject.
4204 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4205
4206 // We also must reject the case when base offset is different and
4207 // scale reg is not null, we cannot handle this case due to merge of
4208 // different offsets will be used as ScaleReg.
4209 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4210 !NewAddrMode.ScaledReg);
4211
4212 // We also must reject the case when GV is different and BaseReg installed
4213 // due to we want to use base reg as a merge of GV values.
4214 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4215 !NewAddrMode.HasBaseReg);
4216
4217 // Even if NewAddMode is the same we still need to collect it due to
4218 // original value is different. And later we will need all original values
4219 // as anchors during finding the common Phi node.
4220 if (CanHandle)
4221 AddrModes.emplace_back(NewAddrMode);
4222 else
4223 AddrModes.clear();
4224
4225 return CanHandle;
4226 }
4227
4228 /// Combine the addressing modes we've collected into a single
4229 /// addressing mode.
4230 /// \return True iff we successfully combined them or we only had one so
4231 /// didn't need to combine them anyway.
4232 bool combineAddrModes() {
4233 // If we have no AddrModes then they can't be combined.
4234 if (AddrModes.size() == 0)
4235 return false;
4236
4237 // A single AddrMode can trivially be combined.
4238 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4239 return true;
4240
4241 // If the AddrModes we collected are all just equal to the value they are
4242 // derived from then combining them wouldn't do anything useful.
4243 if (AllAddrModesTrivial)
4244 return false;
4245
4246 if (!addrModeCombiningAllowed())
4247 return false;
4248
4249 // Build a map between <original value, basic block where we saw it> to
4250 // value of base register.
4251 // Bail out if there is no common type.
4252 FoldAddrToValueMapping Map;
4253 if (!initializeMap(Map))
4254 return false;
4255
4256 CommonValue = findCommon(Map);
4257 if (CommonValue)
4258 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4259 return CommonValue != nullptr;
4260 }
4261
4262private:
4263 /// `CommonValue` may be a placeholder inserted by us.
4264 /// If the placeholder is not used, we should remove this dead instruction.
4265 void eraseCommonValueIfDead() {
4266 if (CommonValue && CommonValue->use_empty())
4267 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4268 CommonInst->eraseFromParent();
4269 }
4270
4271 /// Initialize Map with anchor values. For address seen
4272 /// we set the value of different field saw in this address.
4273 /// At the same time we find a common type for different field we will
4274 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4275 /// Return false if there is no common type found.
4276 bool initializeMap(FoldAddrToValueMapping &Map) {
4277 // Keep track of keys where the value is null. We will need to replace it
4278 // with constant null when we know the common type.
4279 SmallVector<Value *, 2> NullValue;
4280 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4281 for (auto &AM : AddrModes) {
4282 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4283 if (DV) {
4284 auto *Type = DV->getType();
4285 if (CommonType && CommonType != Type)
4286 return false;
4287 CommonType = Type;
4288 Map[AM.OriginalValue] = DV;
4289 } else {
4290 NullValue.push_back(AM.OriginalValue);
4291 }
4292 }
4293 assert(CommonType && "At least one non-null value must be!");
4294 for (auto *V : NullValue)
4295 Map[V] = Constant::getNullValue(CommonType);
4296 return true;
4297 }
4298
4299 /// We have mapping between value A and other value B where B was a field in
4300 /// addressing mode represented by A. Also we have an original value C
4301 /// representing an address we start with. Traversing from C through phi and
4302 /// selects we ended up with A's in a map. This utility function tries to find
4303 /// a value V which is a field in addressing mode C and traversing through phi
4304 /// nodes and selects we will end up in corresponded values B in a map.
4305 /// The utility will create a new Phi/Selects if needed.
4306 // The simple example looks as follows:
4307 // BB1:
4308 // p1 = b1 + 40
4309 // br cond BB2, BB3
4310 // BB2:
4311 // p2 = b2 + 40
4312 // br BB3
4313 // BB3:
4314 // p = phi [p1, BB1], [p2, BB2]
4315 // v = load p
4316 // Map is
4317 // p1 -> b1
4318 // p2 -> b2
4319 // Request is
4320 // p -> ?
4321 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4322 Value *findCommon(FoldAddrToValueMapping &Map) {
4323 // Tracks the simplification of newly created phi nodes. The reason we use
4324 // this mapping is because we will add new created Phi nodes in AddrToBase.
4325 // Simplification of Phi nodes is recursive, so some Phi node may
4326 // be simplified after we added it to AddrToBase. In reality this
4327 // simplification is possible only if original phi/selects were not
4328 // simplified yet.
4329 // Using this mapping we can find the current value in AddrToBase.
4330 SimplificationTracker ST;
4331
4332 // First step, DFS to create PHI nodes for all intermediate blocks.
4333 // Also fill traverse order for the second step.
4334 SmallVector<Value *, 32> TraverseOrder;
4335 InsertPlaceholders(Map, TraverseOrder, ST);
4336
4337 // Second Step, fill new nodes by merged values and simplify if possible.
4338 FillPlaceholders(Map, TraverseOrder, ST);
4339
4340 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4341 ST.destroyNewNodes(CommonType);
4342 return nullptr;
4343 }
4344
4345 // Now we'd like to match New Phi nodes to existed ones.
4346 unsigned PhiNotMatchedCount = 0;
4347 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4348 ST.destroyNewNodes(CommonType);
4349 return nullptr;
4350 }
4351
4352 auto *Result = ST.Get(Map.find(Original)->second);
4353 if (Result) {
4354 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4355 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4356 }
4357 return Result;
4358 }
4359
4360 /// Try to match PHI node to Candidate.
4361 /// Matcher tracks the matched Phi nodes.
4362 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4363 SmallSetVector<PHIPair, 8> &Matcher,
4364 PhiNodeSet &PhiNodesToMatch) {
4365 SmallVector<PHIPair, 8> WorkList;
4366 Matcher.insert({PHI, Candidate});
4367 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4368 MatchedPHIs.insert(PHI);
4369 WorkList.push_back({PHI, Candidate});
4370 SmallSet<PHIPair, 8> Visited;
4371 while (!WorkList.empty()) {
4372 auto Item = WorkList.pop_back_val();
4373 if (!Visited.insert(Item).second)
4374 continue;
4375 // We iterate over all incoming values to Phi to compare them.
4376 // If values are different and both of them Phi and the first one is a
4377 // Phi we added (subject to match) and both of them is in the same basic
4378 // block then we can match our pair if values match. So we state that
4379 // these values match and add it to work list to verify that.
4380 for (auto *B : Item.first->blocks()) {
4381 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4382 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4383 if (FirstValue == SecondValue)
4384 continue;
4385
4386 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4387 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4388
4389 // One of them is not Phi or
4390 // The first one is not Phi node from the set we'd like to match or
4391 // Phi nodes from different basic blocks then
4392 // we will not be able to match.
4393 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4394 FirstPhi->getParent() != SecondPhi->getParent())
4395 return false;
4396
4397 // If we already matched them then continue.
4398 if (Matcher.count({FirstPhi, SecondPhi}))
4399 continue;
4400 // So the values are different and does not match. So we need them to
4401 // match. (But we register no more than one match per PHI node, so that
4402 // we won't later try to replace them twice.)
4403 if (MatchedPHIs.insert(FirstPhi).second)
4404 Matcher.insert({FirstPhi, SecondPhi});
4405 // But me must check it.
4406 WorkList.push_back({FirstPhi, SecondPhi});
4407 }
4408 }
4409 return true;
4410 }
4411
4412 /// For the given set of PHI nodes (in the SimplificationTracker) try
4413 /// to find their equivalents.
4414 /// Returns false if this matching fails and creation of new Phi is disabled.
4415 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4416 unsigned &PhiNotMatchedCount) {
4417 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4418 // order, so the replacements (ReplacePhi) are also done in a deterministic
4419 // order.
4420 SmallSetVector<PHIPair, 8> Matched;
4421 SmallPtrSet<PHINode *, 8> WillNotMatch;
4422 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4423 while (PhiNodesToMatch.size()) {
4424 PHINode *PHI = *PhiNodesToMatch.begin();
4425
4426 // Add us, if no Phi nodes in the basic block we do not match.
4427 WillNotMatch.clear();
4428 WillNotMatch.insert(PHI);
4429
4430 // Traverse all Phis until we found equivalent or fail to do that.
4431 bool IsMatched = false;
4432 for (auto &P : PHI->getParent()->phis()) {
4433 // Skip new Phi nodes.
4434 if (PhiNodesToMatch.count(&P))
4435 continue;
4436 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4437 break;
4438 // If it does not match, collect all Phi nodes from matcher.
4439 // if we end up with no match, them all these Phi nodes will not match
4440 // later.
4441 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4442 Matched.clear();
4443 }
4444 if (IsMatched) {
4445 // Replace all matched values and erase them.
4446 for (auto MV : Matched)
4447 ST.ReplacePhi(MV.first, MV.second);
4448 Matched.clear();
4449 continue;
4450 }
4451 // If we are not allowed to create new nodes then bail out.
4452 if (!AllowNewPhiNodes)
4453 return false;
4454 // Just remove all seen values in matcher. They will not match anything.
4455 PhiNotMatchedCount += WillNotMatch.size();
4456 for (auto *P : WillNotMatch)
4457 PhiNodesToMatch.erase(P);
4458 }
4459 return true;
4460 }
4461 /// Fill the placeholders with values from predecessors and simplify them.
4462 void FillPlaceholders(FoldAddrToValueMapping &Map,
4463 SmallVectorImpl<Value *> &TraverseOrder,
4464 SimplificationTracker &ST) {
4465 while (!TraverseOrder.empty()) {
4466 Value *Current = TraverseOrder.pop_back_val();
4467 assert(Map.contains(Current) && "No node to fill!!!");
4468 Value *V = Map[Current];
4469
4470 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4471 // CurrentValue also must be Select.
4472 auto *CurrentSelect = cast<SelectInst>(Current);
4473 auto *TrueValue = CurrentSelect->getTrueValue();
4474 assert(Map.contains(TrueValue) && "No True Value!");
4475 Select->setTrueValue(ST.Get(Map[TrueValue]));
4476 auto *FalseValue = CurrentSelect->getFalseValue();
4477 assert(Map.contains(FalseValue) && "No False Value!");
4478 Select->setFalseValue(ST.Get(Map[FalseValue]));
4479 } else {
4480 // Must be a Phi node then.
4481 auto *PHI = cast<PHINode>(V);
4482 // Fill the Phi node with values from predecessors.
4483 for (auto *B : predecessors(PHI->getParent())) {
4484 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4485 assert(Map.contains(PV) && "No predecessor Value!");
4486 PHI->addIncoming(ST.Get(Map[PV]), B);
4487 }
4488 }
4489 }
4490 }
4491
4492 /// Starting from original value recursively iterates over def-use chain up to
4493 /// known ending values represented in a map. For each traversed phi/select
4494 /// inserts a placeholder Phi or Select.
4495 /// Reports all new created Phi/Select nodes by adding them to set.
4496 /// Also reports and order in what values have been traversed.
4497 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4498 SmallVectorImpl<Value *> &TraverseOrder,
4499 SimplificationTracker &ST) {
4500 SmallVector<Value *, 32> Worklist;
4501 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4502 "Address must be a Phi or Select node");
4503 auto *Dummy = PoisonValue::get(CommonType);
4504 Worklist.push_back(Original);
4505 while (!Worklist.empty()) {
4506 Value *Current = Worklist.pop_back_val();
4507 // if it is already visited or it is an ending value then skip it.
4508 if (Map.contains(Current))
4509 continue;
4510 TraverseOrder.push_back(Current);
4511
4512 // CurrentValue must be a Phi node or select. All others must be covered
4513 // by anchors.
4514 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4515 // Is it OK to get metadata from OrigSelect?!
4516 // Create a Select placeholder with dummy value.
4517 SelectInst *Select =
4518 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4519 CurrentSelect->getName(),
4520 CurrentSelect->getIterator(), CurrentSelect);
4521 Map[Current] = Select;
4522 ST.insertNewSelect(Select);
4523 // We are interested in True and False values.
4524 Worklist.push_back(CurrentSelect->getTrueValue());
4525 Worklist.push_back(CurrentSelect->getFalseValue());
4526 } else {
4527 // It must be a Phi node then.
4528 PHINode *CurrentPhi = cast<PHINode>(Current);
4529 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4530 PHINode *PHI =
4531 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4532 Map[Current] = PHI;
4533 ST.insertNewPhi(PHI);
4534 append_range(Worklist, CurrentPhi->incoming_values());
4535 }
4536 }
4537 }
4538
4539 bool addrModeCombiningAllowed() {
4541 return false;
4542 switch (DifferentField) {
4543 default:
4544 return false;
4545 case ExtAddrMode::BaseRegField:
4547 case ExtAddrMode::BaseGVField:
4548 return AddrSinkCombineBaseGV;
4549 case ExtAddrMode::BaseOffsField:
4551 case ExtAddrMode::ScaledRegField:
4553 }
4554 }
4555};
4556} // end anonymous namespace
4557
4558/// Try adding ScaleReg*Scale to the current addressing mode.
4559/// Return true and update AddrMode if this addr mode is legal for the target,
4560/// false if not.
4561bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4562 unsigned Depth) {
4563 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4564 // mode. Just process that directly.
4565 if (Scale == 1)
4566 return matchAddr(ScaleReg, Depth);
4567
4568 // If the scale is 0, it takes nothing to add this.
4569 if (Scale == 0)
4570 return true;
4571
4572 // If we already have a scale of this value, we can add to it, otherwise, we
4573 // need an available scale field.
4574 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4575 return false;
4576
4577 ExtAddrMode TestAddrMode = AddrMode;
4578
4579 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4580 // [A+B + A*7] -> [B+A*8].
4581 TestAddrMode.Scale += Scale;
4582 TestAddrMode.ScaledReg = ScaleReg;
4583
4584 // If the new address isn't legal, bail out.
4585 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4586 return false;
4587
4588 // It was legal, so commit it.
4589 AddrMode = TestAddrMode;
4590
4591 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4592 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4593 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4594 // go any further: we can reuse it and cannot eliminate it.
4595 ConstantInt *CI = nullptr;
4596 Value *AddLHS = nullptr;
4597 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4598 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4599 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4600 TestAddrMode.InBounds = false;
4601 TestAddrMode.ScaledReg = AddLHS;
4602 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4603
4604 // If this addressing mode is legal, commit it and remember that we folded
4605 // this instruction.
4606 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4607 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4608 AddrMode = TestAddrMode;
4609 return true;
4610 }
4611 // Restore status quo.
4612 TestAddrMode = AddrMode;
4613 }
4614
4615 // If this is an add recurrence with a constant step, return the increment
4616 // instruction and the canonicalized step.
4617 auto GetConstantStep =
4618 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4619 auto *PN = dyn_cast<PHINode>(V);
4620 if (!PN)
4621 return std::nullopt;
4622 auto IVInc = getIVIncrement(PN, &LI);
4623 if (!IVInc)
4624 return std::nullopt;
4625 // TODO: The result of the intrinsics above is two-complement. However when
4626 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4627 // If it has nuw or nsw flags, we need to make sure that these flags are
4628 // inferrable at the point of memory instruction. Otherwise we are replacing
4629 // well-defined two-complement computation with poison. Currently, to avoid
4630 // potentially complex analysis needed to prove this, we reject such cases.
4631 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4632 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4633 return std::nullopt;
4634 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4635 return std::make_pair(IVInc->first, ConstantStep->getValue());
4636 return std::nullopt;
4637 };
4638
4639 // Try to account for the following special case:
4640 // 1. ScaleReg is an inductive variable;
4641 // 2. We use it with non-zero offset;
4642 // 3. IV's increment is available at the point of memory instruction.
4643 //
4644 // In this case, we may reuse the IV increment instead of the IV Phi to
4645 // achieve the following advantages:
4646 // 1. If IV step matches the offset, we will have no need in the offset;
4647 // 2. Even if they don't match, we will reduce the overlap of living IV
4648 // and IV increment, that will potentially lead to better register
4649 // assignment.
4650 if (AddrMode.BaseOffs) {
4651 if (auto IVStep = GetConstantStep(ScaleReg)) {
4652 Instruction *IVInc = IVStep->first;
4653 // The following assert is important to ensure a lack of infinite loops.
4654 // This transforms is (intentionally) the inverse of the one just above.
4655 // If they don't agree on the definition of an increment, we'd alternate
4656 // back and forth indefinitely.
4657 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4658 APInt Step = IVStep->second;
4659 APInt Offset = Step * AddrMode.Scale;
4660 if (Offset.isSignedIntN(64)) {
4661 TestAddrMode.InBounds = false;
4662 TestAddrMode.ScaledReg = IVInc;
4663 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4664 // If this addressing mode is legal, commit it..
4665 // (Note that we defer the (expensive) domtree base legality check
4666 // to the very last possible point.)
4667 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4668 getDTFn().dominates(IVInc, MemoryInst)) {
4669 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4670 AddrMode = TestAddrMode;
4671 return true;
4672 }
4673 // Restore status quo.
4674 TestAddrMode = AddrMode;
4675 }
4676 }
4677 }
4678
4679 // Otherwise, just return what we have.
4680 return true;
4681}
4682
4683/// This is a little filter, which returns true if an addressing computation
4684/// involving I might be folded into a load/store accessing it.
4685/// This doesn't need to be perfect, but needs to accept at least
4686/// the set of instructions that MatchOperationAddr can.
4688 switch (I->getOpcode()) {
4689 case Instruction::BitCast:
4690 case Instruction::AddrSpaceCast:
4691 // Don't touch identity bitcasts.
4692 if (I->getType() == I->getOperand(0)->getType())
4693 return false;
4694 return I->getType()->isIntOrPtrTy();
4695 case Instruction::PtrToInt:
4696 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4697 return true;
4698 case Instruction::IntToPtr:
4699 // We know the input is intptr_t, so this is foldable.
4700 return true;
4701 case Instruction::Add:
4702 return true;
4703 case Instruction::Mul:
4704 case Instruction::Shl:
4705 // Can only handle X*C and X << C.
4706 return isa<ConstantInt>(I->getOperand(1));
4707 case Instruction::GetElementPtr:
4708 return true;
4709 default:
4710 return false;
4711 }
4712}
4713
4714/// Check whether or not \p Val is a legal instruction for \p TLI.
4715/// \note \p Val is assumed to be the product of some type promotion.
4716/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4717/// to be legal, as the non-promoted value would have had the same state.
4719 const DataLayout &DL, Value *Val) {
4720 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4721 if (!PromotedInst)
4722 return false;
4723 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4724 // If the ISDOpcode is undefined, it was undefined before the promotion.
4725 if (!ISDOpcode)
4726 return true;
4727 // Otherwise, check if the promoted instruction is legal or not.
4728 return TLI.isOperationLegalOrCustom(
4729 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4730}
4731
4732namespace {
4733
4734/// Hepler class to perform type promotion.
4735class TypePromotionHelper {
4736 /// Utility function to add a promoted instruction \p ExtOpnd to
4737 /// \p PromotedInsts and record the type of extension we have seen.
4738 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4739 Instruction *ExtOpnd, bool IsSExt) {
4740 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4741 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4742 if (!Inserted) {
4743 // If the new extension is same as original, the information in
4744 // PromotedInsts[ExtOpnd] is still correct.
4745 if (It->second.getInt() == ExtTy)
4746 return;
4747
4748 // Now the new extension is different from old extension, we make
4749 // the type information invalid by setting extension type to
4750 // BothExtension.
4751 ExtTy = BothExtension;
4752 }
4753 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4754 }
4755
4756 /// Utility function to query the original type of instruction \p Opnd
4757 /// with a matched extension type. If the extension doesn't match, we
4758 /// cannot use the information we had on the original type.
4759 /// BothExtension doesn't match any extension type.
4760 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4761 Instruction *Opnd, bool IsSExt) {
4762 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4763 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4764 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4765 return It->second.getPointer();
4766 return nullptr;
4767 }
4768
4769 /// Utility function to check whether or not a sign or zero extension
4770 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4771 /// either using the operands of \p Inst or promoting \p Inst.
4772 /// The type of the extension is defined by \p IsSExt.
4773 /// In other words, check if:
4774 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4775 /// #1 Promotion applies:
4776 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4777 /// #2 Operand reuses:
4778 /// ext opnd1 to ConsideredExtType.
4779 /// \p PromotedInsts maps the instructions to their type before promotion.
4780 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4781 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4782
4783 /// Utility function to determine if \p OpIdx should be promoted when
4784 /// promoting \p Inst.
4785 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4786 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4787 }
4788
4789 /// Utility function to promote the operand of \p Ext when this
4790 /// operand is a promotable trunc or sext or zext.
4791 /// \p PromotedInsts maps the instructions to their type before promotion.
4792 /// \p CreatedInstsCost[out] contains the cost of all instructions
4793 /// created to promote the operand of Ext.
4794 /// Newly added extensions are inserted in \p Exts.
4795 /// Newly added truncates are inserted in \p Truncs.
4796 /// Should never be called directly.
4797 /// \return The promoted value which is used instead of Ext.
4798 static Value *promoteOperandForTruncAndAnyExt(
4799 Instruction *Ext, TypePromotionTransaction &TPT,
4800 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4801 SmallVectorImpl<Instruction *> *Exts,
4802 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4803
4804 /// Utility function to promote the operand of \p Ext when this
4805 /// operand is promotable and is not a supported trunc or sext.
4806 /// \p PromotedInsts maps the instructions to their type before promotion.
4807 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4808 /// created to promote the operand of Ext.
4809 /// Newly added extensions are inserted in \p Exts.
4810 /// Newly added truncates are inserted in \p Truncs.
4811 /// Should never be called directly.
4812 /// \return The promoted value which is used instead of Ext.
4813 static Value *promoteOperandForOther(Instruction *Ext,
4814 TypePromotionTransaction &TPT,
4815 InstrToOrigTy &PromotedInsts,
4816 unsigned &CreatedInstsCost,
4817 SmallVectorImpl<Instruction *> *Exts,
4818 SmallVectorImpl<Instruction *> *Truncs,
4819 const TargetLowering &TLI, bool IsSExt);
4820
4821 /// \see promoteOperandForOther.
4822 static Value *signExtendOperandForOther(
4823 Instruction *Ext, TypePromotionTransaction &TPT,
4824 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4825 SmallVectorImpl<Instruction *> *Exts,
4826 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4827 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4828 Exts, Truncs, TLI, true);
4829 }
4830
4831 /// \see promoteOperandForOther.
4832 static Value *zeroExtendOperandForOther(
4833 Instruction *Ext, TypePromotionTransaction &TPT,
4834 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4835 SmallVectorImpl<Instruction *> *Exts,
4836 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4837 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4838 Exts, Truncs, TLI, false);
4839 }
4840
4841public:
4842 /// Type for the utility function that promotes the operand of Ext.
4843 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4844 InstrToOrigTy &PromotedInsts,
4845 unsigned &CreatedInstsCost,
4846 SmallVectorImpl<Instruction *> *Exts,
4847 SmallVectorImpl<Instruction *> *Truncs,
4848 const TargetLowering &TLI);
4849
4850 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4851 /// action to promote the operand of \p Ext instead of using Ext.
4852 /// \return NULL if no promotable action is possible with the current
4853 /// sign extension.
4854 /// \p InsertedInsts keeps track of all the instructions inserted by the
4855 /// other CodeGenPrepare optimizations. This information is important
4856 /// because we do not want to promote these instructions as CodeGenPrepare
4857 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4858 /// \p PromotedInsts maps the instructions to their type before promotion.
4859 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4860 const TargetLowering &TLI,
4861 const InstrToOrigTy &PromotedInsts);
4862};
4863
4864} // end anonymous namespace
4865
4866bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4867 Type *ConsideredExtType,
4868 const InstrToOrigTy &PromotedInsts,
4869 bool IsSExt) {
4870 // The promotion helper does not know how to deal with vector types yet.
4871 // To be able to fix that, we would need to fix the places where we
4872 // statically extend, e.g., constants and such.
4873 if (Inst->getType()->isVectorTy())
4874 return false;
4875
4876 // We can always get through zext.
4877 if (isa<ZExtInst>(Inst))
4878 return true;
4879
4880 // sext(sext) is ok too.
4881 if (IsSExt && isa<SExtInst>(Inst))
4882 return true;
4883
4884 // We can get through binary operator, if it is legal. In other words, the
4885 // binary operator must have a nuw or nsw flag.
4886 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4887 if (isa<OverflowingBinaryOperator>(BinOp) &&
4888 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4889 (IsSExt && BinOp->hasNoSignedWrap())))
4890 return true;
4891
4892 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4893 if ((Inst->getOpcode() == Instruction::And ||
4894 Inst->getOpcode() == Instruction::Or))
4895 return true;
4896
4897 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4898 if (Inst->getOpcode() == Instruction::Xor) {
4899 // Make sure it is not a NOT.
4900 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4901 if (!Cst->getValue().isAllOnes())
4902 return true;
4903 }
4904
4905 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4906 // It may change a poisoned value into a regular value, like
4907 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4908 // poisoned value regular value
4909 // It should be OK since undef covers valid value.
4910 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4911 return true;
4912
4913 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4914 // It may change a poisoned value into a regular value, like
4915 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4916 // poisoned value regular value
4917 // It should be OK since undef covers valid value.
4918 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4919 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4920 if (ExtInst->hasOneUse()) {
4921 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4922 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4923 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4924 if (Cst &&
4925 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4926 return true;
4927 }
4928 }
4929 }
4930
4931 // Check if we can do the following simplification.
4932 // ext(trunc(opnd)) --> ext(opnd)
4933 if (!isa<TruncInst>(Inst))
4934 return false;
4935
4936 Value *OpndVal = Inst->getOperand(0);
4937 // Check if we can use this operand in the extension.
4938 // If the type is larger than the result type of the extension, we cannot.
4939 if (!OpndVal->getType()->isIntegerTy() ||
4940 OpndVal->getType()->getIntegerBitWidth() >
4941 ConsideredExtType->getIntegerBitWidth())
4942 return false;
4943
4944 // If the operand of the truncate is not an instruction, we will not have
4945 // any information on the dropped bits.
4946 // (Actually we could for constant but it is not worth the extra logic).
4947 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4948 if (!Opnd)
4949 return false;
4950
4951 // Check if the source of the type is narrow enough.
4952 // I.e., check that trunc just drops extended bits of the same kind of
4953 // the extension.
4954 // #1 get the type of the operand and check the kind of the extended bits.
4955 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4956 if (OpndType)
4957 ;
4958 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4959 OpndType = Opnd->getOperand(0)->getType();
4960 else
4961 return false;
4962
4963 // #2 check that the truncate just drops extended bits.
4964 return Inst->getType()->getIntegerBitWidth() >=
4965 OpndType->getIntegerBitWidth();
4966}
4967
4968TypePromotionHelper::Action TypePromotionHelper::getAction(
4969 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4970 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4971 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4972 "Unexpected instruction type");
4973 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4974 Type *ExtTy = Ext->getType();
4975 bool IsSExt = isa<SExtInst>(Ext);
4976 // If the operand of the extension is not an instruction, we cannot
4977 // get through.
4978 // If it, check we can get through.
4979 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4980 return nullptr;
4981
4982 // Do not promote if the operand has been added by codegenprepare.
4983 // Otherwise, it means we are undoing an optimization that is likely to be
4984 // redone, thus causing potential infinite loop.
4985 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4986 return nullptr;
4987
4988 // SExt or Trunc instructions.
4989 // Return the related handler.
4990 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4991 isa<ZExtInst>(ExtOpnd))
4992 return promoteOperandForTruncAndAnyExt;
4993
4994 // Regular instruction.
4995 // Abort early if we will have to insert non-free instructions.
4996 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4997 return nullptr;
4998 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4999}
5000
5001Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
5002 Instruction *SExt, TypePromotionTransaction &TPT,
5003 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5004 SmallVectorImpl<Instruction *> *Exts,
5005 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
5006 // By construction, the operand of SExt is an instruction. Otherwise we cannot
5007 // get through it and this method should not be called.
5008 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
5009 Value *ExtVal = SExt;
5010 bool HasMergedNonFreeExt = false;
5011 if (isa<ZExtInst>(SExtOpnd)) {
5012 // Replace s|zext(zext(opnd))
5013 // => zext(opnd).
5014 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
5015 Value *ZExt =
5016 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
5017 TPT.replaceAllUsesWith(SExt, ZExt);
5018 TPT.eraseInstruction(SExt);
5019 ExtVal = ZExt;
5020 } else {
5021 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
5022 // => z|sext(opnd).
5023 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
5024 }
5025 CreatedInstsCost = 0;
5026
5027 // Remove dead code.
5028 if (SExtOpnd->use_empty())
5029 TPT.eraseInstruction(SExtOpnd);
5030
5031 // Check if the extension is still needed.
5032 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5033 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5034 if (ExtInst) {
5035 if (Exts)
5036 Exts->push_back(ExtInst);
5037 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5038 }
5039 return ExtVal;
5040 }
5041
5042 // At this point we have: ext ty opnd to ty.
5043 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5044 Value *NextVal = ExtInst->getOperand(0);
5045 TPT.eraseInstruction(ExtInst, NextVal);
5046 return NextVal;
5047}
5048
5049Value *TypePromotionHelper::promoteOperandForOther(
5050 Instruction *Ext, TypePromotionTransaction &TPT,
5051 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5052 SmallVectorImpl<Instruction *> *Exts,
5053 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5054 bool IsSExt) {
5055 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5056 // get through it and this method should not be called.
5057 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5058 CreatedInstsCost = 0;
5059 if (!ExtOpnd->hasOneUse()) {
5060 // ExtOpnd will be promoted.
5061 // All its uses, but Ext, will need to use a truncated value of the
5062 // promoted version.
5063 // Create the truncate now.
5064 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5065 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5066 // Insert it just after the definition.
5067 ITrunc->moveAfter(ExtOpnd);
5068 if (Truncs)
5069 Truncs->push_back(ITrunc);
5070 }
5071
5072 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5073 // Restore the operand of Ext (which has been replaced by the previous call
5074 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5075 TPT.setOperand(Ext, 0, ExtOpnd);
5076 }
5077
5078 // Get through the Instruction:
5079 // 1. Update its type.
5080 // 2. Replace the uses of Ext by Inst.
5081 // 3. Extend each operand that needs to be extended.
5082
5083 // Remember the original type of the instruction before promotion.
5084 // This is useful to know that the high bits are sign extended bits.
5085 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5086 // Step #1.
5087 TPT.mutateType(ExtOpnd, Ext->getType());
5088 // Step #2.
5089 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5090 // Step #3.
5091 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5092 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5093 ++OpIdx) {
5094 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5095 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5096 !shouldExtOperand(ExtOpnd, OpIdx)) {
5097 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5098 continue;
5099 }
5100 // Check if we can statically extend the operand.
5101 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5102 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5103 LLVM_DEBUG(dbgs() << "Statically extend\n");
5104 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5105 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5106 : Cst->getValue().zext(BitWidth);
5107 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5108 continue;
5109 }
5110 // UndefValue are typed, so we have to statically sign extend them.
5111 if (isa<UndefValue>(Opnd)) {
5112 LLVM_DEBUG(dbgs() << "Statically extend\n");
5113 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5114 continue;
5115 }
5116
5117 // Otherwise we have to explicitly sign extend the operand.
5118 Value *ValForExtOpnd = IsSExt
5119 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5120 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5121 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5122 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5123 if (!InstForExtOpnd)
5124 continue;
5125
5126 if (Exts)
5127 Exts->push_back(InstForExtOpnd);
5128
5129 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5130 }
5131 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5132 TPT.eraseInstruction(Ext);
5133 return ExtOpnd;
5134}
5135
5136/// Check whether or not promoting an instruction to a wider type is profitable.
5137/// \p NewCost gives the cost of extension instructions created by the
5138/// promotion.
5139/// \p OldCost gives the cost of extension instructions before the promotion
5140/// plus the number of instructions that have been
5141/// matched in the addressing mode the promotion.
5142/// \p PromotedOperand is the value that has been promoted.
5143/// \return True if the promotion is profitable, false otherwise.
5144bool AddressingModeMatcher::isPromotionProfitable(
5145 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5146 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5147 << '\n');
5148 // The cost of the new extensions is greater than the cost of the
5149 // old extension plus what we folded.
5150 // This is not profitable.
5151 if (NewCost > OldCost)
5152 return false;
5153 if (NewCost < OldCost)
5154 return true;
5155 // The promotion is neutral but it may help folding the sign extension in
5156 // loads for instance.
5157 // Check that we did not create an illegal instruction.
5158 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5159}
5160
5161/// Given an instruction or constant expr, see if we can fold the operation
5162/// into the addressing mode. If so, update the addressing mode and return
5163/// true, otherwise return false without modifying AddrMode.
5164/// If \p MovedAway is not NULL, it contains the information of whether or
5165/// not AddrInst has to be folded into the addressing mode on success.
5166/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5167/// because it has been moved away.
5168/// Thus AddrInst must not be added in the matched instructions.
5169/// This state can happen when AddrInst is a sext, since it may be moved away.
5170/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5171/// not be referenced anymore.
5172bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5173 unsigned Depth,
5174 bool *MovedAway) {
5175 // Avoid exponential behavior on extremely deep expression trees.
5176 if (Depth >= 5)
5177 return false;
5178
5179 // By default, all matched instructions stay in place.
5180 if (MovedAway)
5181 *MovedAway = false;
5182
5183 switch (Opcode) {
5184 case Instruction::PtrToInt:
5185 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5186 return matchAddr(AddrInst->getOperand(0), Depth);
5187 case Instruction::IntToPtr: {
5188 auto AS = AddrInst->getType()->getPointerAddressSpace();
5189 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5190 // This inttoptr is a no-op if the integer type is pointer sized.
5191 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5192 return matchAddr(AddrInst->getOperand(0), Depth);
5193 return false;
5194 }
5195 case Instruction::BitCast:
5196 // BitCast is always a noop, and we can handle it as long as it is
5197 // int->int or pointer->pointer (we don't want int<->fp or something).
5198 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5199 // Don't touch identity bitcasts. These were probably put here by LSR,
5200 // and we don't want to mess around with them. Assume it knows what it
5201 // is doing.
5202 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5203 return matchAddr(AddrInst->getOperand(0), Depth);
5204 return false;
5205 case Instruction::AddrSpaceCast: {
5206 unsigned SrcAS =
5207 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5208 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5209 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5210 return matchAddr(AddrInst->getOperand(0), Depth);
5211 return false;
5212 }
5213 case Instruction::Add: {
5214 // Check to see if we can merge in one operand, then the other. If so, we
5215 // win.
5216 ExtAddrMode BackupAddrMode = AddrMode;
5217 unsigned OldSize = AddrModeInsts.size();
5218 // Start a transaction at this point.
5219 // The LHS may match but not the RHS.
5220 // Therefore, we need a higher level restoration point to undo partially
5221 // matched operation.
5222 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5223 TPT.getRestorationPoint();
5224
5225 // Try to match an integer constant second to increase its chance of ending
5226 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5227 int First = 0, Second = 1;
5228 if (isa<ConstantInt>(AddrInst->getOperand(First))
5229 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5230 std::swap(First, Second);
5231 AddrMode.InBounds = false;
5232 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5233 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5234 return true;
5235
5236 // Restore the old addr mode info.
5237 AddrMode = BackupAddrMode;
5238 AddrModeInsts.resize(OldSize);
5239 TPT.rollback(LastKnownGood);
5240
5241 // Otherwise this was over-aggressive. Try merging operands in the opposite
5242 // order.
5243 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5244 matchAddr(AddrInst->getOperand(First), Depth + 1))
5245 return true;
5246
5247 // Otherwise we definitely can't merge the ADD in.
5248 AddrMode = BackupAddrMode;
5249 AddrModeInsts.resize(OldSize);
5250 TPT.rollback(LastKnownGood);
5251 break;
5252 }
5253 // case Instruction::Or:
5254 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5255 // break;
5256 case Instruction::Mul:
5257 case Instruction::Shl: {
5258 // Can only handle X*C and X << C.
5259 AddrMode.InBounds = false;
5260 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5261 if (!RHS || RHS->getBitWidth() > 64)
5262 return false;
5263 int64_t Scale = Opcode == Instruction::Shl
5264 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5265 : RHS->getSExtValue();
5266
5267 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5268 }
5269 case Instruction::GetElementPtr: {
5270 // Scan the GEP. We check it if it contains constant offsets and at most
5271 // one variable offset.
5272 int VariableOperand = -1;
5273 unsigned VariableScale = 0;
5274
5275 int64_t ConstantOffset = 0;
5276 gep_type_iterator GTI = gep_type_begin(AddrInst);
5277 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5278 if (StructType *STy = GTI.getStructTypeOrNull()) {
5279 const StructLayout *SL = DL.getStructLayout(STy);
5280 unsigned Idx =
5281 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5282 ConstantOffset += SL->getElementOffset(Idx);
5283 } else {
5284 TypeSize TS = GTI.getSequentialElementStride(DL);
5285 if (TS.isNonZero()) {
5286 // The optimisations below currently only work for fixed offsets.
5287 if (TS.isScalable())
5288 return false;
5289 int64_t TypeSize = TS.getFixedValue();
5290 if (ConstantInt *CI =
5291 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5292 const APInt &CVal = CI->getValue();
5293 if (CVal.getSignificantBits() <= 64) {
5294 ConstantOffset += CVal.getSExtValue() * TypeSize;
5295 continue;
5296 }
5297 }
5298 // We only allow one variable index at the moment.
5299 if (VariableOperand != -1)
5300 return false;
5301
5302 // Remember the variable index.
5303 VariableOperand = i;
5304 VariableScale = TypeSize;
5305 }
5306 }
5307 }
5308
5309 // A common case is for the GEP to only do a constant offset. In this case,
5310 // just add it to the disp field and check validity.
5311 if (VariableOperand == -1) {
5312 AddrMode.BaseOffs += ConstantOffset;
5313 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5314 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5315 AddrMode.InBounds = false;
5316 return true;
5317 }
5318 AddrMode.BaseOffs -= ConstantOffset;
5319
5321 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5322 ConstantOffset > 0) {
5323 // Record GEPs with non-zero offsets as candidates for splitting in
5324 // the event that the offset cannot fit into the r+i addressing mode.
5325 // Simple and common case that only one GEP is used in calculating the
5326 // address for the memory access.
5327 Value *Base = AddrInst->getOperand(0);
5328 auto *BaseI = dyn_cast<Instruction>(Base);
5329 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5331 (BaseI && !isa<CastInst>(BaseI) &&
5332 !isa<GetElementPtrInst>(BaseI))) {
5333 // Make sure the parent block allows inserting non-PHI instructions
5334 // before the terminator.
5335 BasicBlock *Parent = BaseI ? BaseI->getParent()
5336 : &GEP->getFunction()->getEntryBlock();
5337 if (!Parent->getTerminator()->isEHPad())
5338 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5339 }
5340 }
5341
5342 return false;
5343 }
5344
5345 // Save the valid addressing mode in case we can't match.
5346 ExtAddrMode BackupAddrMode = AddrMode;
5347 unsigned OldSize = AddrModeInsts.size();
5348
5349 // See if the scale and offset amount is valid for this target.
5350 AddrMode.BaseOffs += ConstantOffset;
5351 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5352 AddrMode.InBounds = false;
5353
5354 // Match the base operand of the GEP.
5355 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5356 // If it couldn't be matched, just stuff the value in a register.
5357 if (AddrMode.HasBaseReg) {
5358 AddrMode = BackupAddrMode;
5359 AddrModeInsts.resize(OldSize);
5360 return false;
5361 }
5362 AddrMode.HasBaseReg = true;
5363 AddrMode.BaseReg = AddrInst->getOperand(0);
5364 }
5365
5366 // Match the remaining variable portion of the GEP.
5367 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5368 Depth)) {
5369 // If it couldn't be matched, try stuffing the base into a register
5370 // instead of matching it, and retrying the match of the scale.
5371 AddrMode = BackupAddrMode;
5372 AddrModeInsts.resize(OldSize);
5373 if (AddrMode.HasBaseReg)
5374 return false;
5375 AddrMode.HasBaseReg = true;
5376 AddrMode.BaseReg = AddrInst->getOperand(0);
5377 AddrMode.BaseOffs += ConstantOffset;
5378 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5379 VariableScale, Depth)) {
5380 // If even that didn't work, bail.
5381 AddrMode = BackupAddrMode;
5382 AddrModeInsts.resize(OldSize);
5383 return false;
5384 }
5385 }
5386
5387 return true;
5388 }
5389 case Instruction::SExt:
5390 case Instruction::ZExt: {
5391 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5392 if (!Ext)
5393 return false;
5394
5395 // Try to move this ext out of the way of the addressing mode.
5396 // Ask for a method for doing so.
5397 TypePromotionHelper::Action TPH =
5398 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5399 if (!TPH)
5400 return false;
5401
5402 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5403 TPT.getRestorationPoint();
5404 unsigned CreatedInstsCost = 0;
5405 unsigned ExtCost = !TLI.isExtFree(Ext);
5406 Value *PromotedOperand =
5407 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5408 // SExt has been moved away.
5409 // Thus either it will be rematched later in the recursive calls or it is
5410 // gone. Anyway, we must not fold it into the addressing mode at this point.
5411 // E.g.,
5412 // op = add opnd, 1
5413 // idx = ext op
5414 // addr = gep base, idx
5415 // is now:
5416 // promotedOpnd = ext opnd <- no match here
5417 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5418 // addr = gep base, op <- match
5419 if (MovedAway)
5420 *MovedAway = true;
5421
5422 assert(PromotedOperand &&
5423 "TypePromotionHelper should have filtered out those cases");
5424
5425 ExtAddrMode BackupAddrMode = AddrMode;
5426 unsigned OldSize = AddrModeInsts.size();
5427
5428 if (!matchAddr(PromotedOperand, Depth) ||
5429 // The total of the new cost is equal to the cost of the created
5430 // instructions.
5431 // The total of the old cost is equal to the cost of the extension plus
5432 // what we have saved in the addressing mode.
5433 !isPromotionProfitable(CreatedInstsCost,
5434 ExtCost + (AddrModeInsts.size() - OldSize),
5435 PromotedOperand)) {
5436 AddrMode = BackupAddrMode;
5437 AddrModeInsts.resize(OldSize);
5438 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5439 TPT.rollback(LastKnownGood);
5440 return false;
5441 }
5442
5443 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5444 AddrMode.replaceWith(Ext, PromotedOperand);
5445 return true;
5446 }
5447 case Instruction::Call:
5448 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5449 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5450 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5451 if (TLI.addressingModeSupportsTLS(GV))
5452 return matchAddr(AddrInst->getOperand(0), Depth);
5453 }
5454 }
5455 break;
5456 }
5457 return false;
5458}
5459
5460/// If we can, try to add the value of 'Addr' into the current addressing mode.
5461/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5462/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5463/// for the target.
5464///
5465bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5466 // Start a transaction at this point that we will rollback if the matching
5467 // fails.
5468 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5469 TPT.getRestorationPoint();
5470 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5471 if (CI->getValue().isSignedIntN(64)) {
5472 // Check if the addition would result in a signed overflow.
5473 int64_t Result;
5474 bool Overflow =
5475 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5476 if (!Overflow) {
5477 // Fold in immediates if legal for the target.
5478 AddrMode.BaseOffs = Result;
5479 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5480 return true;
5481 AddrMode.BaseOffs -= CI->getSExtValue();
5482 }
5483 }
5484 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5485 // If this is a global variable, try to fold it into the addressing mode.
5486 if (!AddrMode.BaseGV) {
5487 AddrMode.BaseGV = GV;
5488 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5489 return true;
5490 AddrMode.BaseGV = nullptr;
5491 }
5492 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5493 ExtAddrMode BackupAddrMode = AddrMode;
5494 unsigned OldSize = AddrModeInsts.size();
5495
5496 // Check to see if it is possible to fold this operation.
5497 bool MovedAway = false;
5498 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5499 // This instruction may have been moved away. If so, there is nothing
5500 // to check here.
5501 if (MovedAway)
5502 return true;
5503 // Okay, it's possible to fold this. Check to see if it is actually
5504 // *profitable* to do so. We use a simple cost model to avoid increasing
5505 // register pressure too much.
5506 if (I->hasOneUse() ||
5507 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5508 AddrModeInsts.push_back(I);
5509 return true;
5510 }
5511
5512 // It isn't profitable to do this, roll back.
5513 AddrMode = BackupAddrMode;
5514 AddrModeInsts.resize(OldSize);
5515 TPT.rollback(LastKnownGood);
5516 }
5517 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5518 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5519 return true;
5520 TPT.rollback(LastKnownGood);
5521 } else if (isa<ConstantPointerNull>(Addr)) {
5522 // Null pointer gets folded without affecting the addressing mode.
5523 return true;
5524 }
5525
5526 // Worse case, the target should support [reg] addressing modes. :)
5527 if (!AddrMode.HasBaseReg) {
5528 AddrMode.HasBaseReg = true;
5529 AddrMode.BaseReg = Addr;
5530 // Still check for legality in case the target supports [imm] but not [i+r].
5531 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5532 return true;
5533 AddrMode.HasBaseReg = false;
5534 AddrMode.BaseReg = nullptr;
5535 }
5536
5537 // If the base register is already taken, see if we can do [r+r].
5538 if (AddrMode.Scale == 0) {
5539 AddrMode.Scale = 1;
5540 AddrMode.ScaledReg = Addr;
5541 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5542 return true;
5543 AddrMode.Scale = 0;
5544 AddrMode.ScaledReg = nullptr;
5545 }
5546 // Couldn't match.
5547 TPT.rollback(LastKnownGood);
5548 return false;
5549}
5550
5551/// Check to see if all uses of OpVal by the specified inline asm call are due
5552/// to memory operands. If so, return true, otherwise return false.
5554 const TargetLowering &TLI,
5555 const TargetRegisterInfo &TRI) {
5556 const Function *F = CI->getFunction();
5557 TargetLowering::AsmOperandInfoVector TargetConstraints =
5558 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5559
5560 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5561 // Compute the constraint code and ConstraintType to use.
5562 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5563
5564 // If this asm operand is our Value*, and if it isn't an indirect memory
5565 // operand, we can't fold it! TODO: Also handle C_Address?
5566 if (OpInfo.CallOperandVal == OpVal &&
5567 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5568 !OpInfo.isIndirect))
5569 return false;
5570 }
5571
5572 return true;
5573}
5574
5575/// Recursively walk all the uses of I until we find a memory use.
5576/// If we find an obviously non-foldable instruction, return true.
5577/// Add accessed addresses and types to MemoryUses.
5579 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5580 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5581 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5582 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5583 // If we already considered this instruction, we're done.
5584 if (!ConsideredInsts.insert(I).second)
5585 return false;
5586
5587 // If this is an obviously unfoldable instruction, bail out.
5588 if (!MightBeFoldableInst(I))
5589 return true;
5590
5591 // Loop over all the uses, recursively processing them.
5592 for (Use &U : I->uses()) {
5593 // Conservatively return true if we're seeing a large number or a deep chain
5594 // of users. This avoids excessive compilation times in pathological cases.
5595 if (SeenInsts++ >= MaxAddressUsersToScan)
5596 return true;
5597
5598 Instruction *UserI = cast<Instruction>(U.getUser());
5599 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5600 MemoryUses.push_back({&U, LI->getType()});
5601 continue;
5602 }
5603
5604 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5605 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5606 return true; // Storing addr, not into addr.
5607 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5608 continue;
5609 }
5610
5611 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5612 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5613 return true; // Storing addr, not into addr.
5614 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5615 continue;
5616 }
5617
5619 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5620 return true; // Storing addr, not into addr.
5621 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5622 continue;
5623 }
5624
5627 Type *AccessTy;
5628 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5629 return true;
5630
5631 if (!find(PtrOps, U.get()))
5632 return true;
5633
5634 MemoryUses.push_back({&U, AccessTy});
5635 continue;
5636 }
5637
5638 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5639 if (CI->hasFnAttr(Attribute::Cold)) {
5640 // If this is a cold call, we can sink the addressing calculation into
5641 // the cold path. See optimizeCallInst
5642 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5643 continue;
5644 }
5645
5646 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5647 if (!IA)
5648 return true;
5649
5650 // If this is a memory operand, we're cool, otherwise bail out.
5651 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5652 return true;
5653 continue;
5654 }
5655
5656 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5657 PSI, BFI, SeenInsts))
5658 return true;
5659 }
5660
5661 return false;
5662}
5663
5665 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5666 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5668 unsigned SeenInsts = 0;
5669 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5670 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5671 PSI, BFI, SeenInsts);
5672}
5673
5674
5675/// Return true if Val is already known to be live at the use site that we're
5676/// folding it into. If so, there is no cost to include it in the addressing
5677/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5678/// instruction already.
5679bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5680 Value *KnownLive1,
5681 Value *KnownLive2) {
5682 // If Val is either of the known-live values, we know it is live!
5683 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5684 return true;
5685
5686 // All values other than instructions and arguments (e.g. constants) are live.
5687 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5688 return true;
5689
5690 // If Val is a constant sized alloca in the entry block, it is live, this is
5691 // true because it is just a reference to the stack/frame pointer, which is
5692 // live for the whole function.
5693 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5694 if (AI->isStaticAlloca())
5695 return true;
5696
5697 // Check to see if this value is already used in the memory instruction's
5698 // block. If so, it's already live into the block at the very least, so we
5699 // can reasonably fold it.
5700 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5701}
5702
5703/// It is possible for the addressing mode of the machine to fold the specified
5704/// instruction into a load or store that ultimately uses it.
5705/// However, the specified instruction has multiple uses.
5706/// Given this, it may actually increase register pressure to fold it
5707/// into the load. For example, consider this code:
5708///
5709/// X = ...
5710/// Y = X+1
5711/// use(Y) -> nonload/store
5712/// Z = Y+1
5713/// load Z
5714///
5715/// In this case, Y has multiple uses, and can be folded into the load of Z
5716/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5717/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5718/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5719/// number of computations either.
5720///
5721/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5722/// X was live across 'load Z' for other reasons, we actually *would* want to
5723/// fold the addressing mode in the Z case. This would make Y die earlier.
5724bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5725 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5726 if (IgnoreProfitability)
5727 return true;
5728
5729 // AMBefore is the addressing mode before this instruction was folded into it,
5730 // and AMAfter is the addressing mode after the instruction was folded. Get
5731 // the set of registers referenced by AMAfter and subtract out those
5732 // referenced by AMBefore: this is the set of values which folding in this
5733 // address extends the lifetime of.
5734 //
5735 // Note that there are only two potential values being referenced here,
5736 // BaseReg and ScaleReg (global addresses are always available, as are any
5737 // folded immediates).
5738 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5739
5740 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5741 // lifetime wasn't extended by adding this instruction.
5742 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5743 BaseReg = nullptr;
5744 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5745 ScaledReg = nullptr;
5746
5747 // If folding this instruction (and it's subexprs) didn't extend any live
5748 // ranges, we're ok with it.
5749 if (!BaseReg && !ScaledReg)
5750 return true;
5751
5752 // If all uses of this instruction can have the address mode sunk into them,
5753 // we can remove the addressing mode and effectively trade one live register
5754 // for another (at worst.) In this context, folding an addressing mode into
5755 // the use is just a particularly nice way of sinking it.
5757 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5758 return false; // Has a non-memory, non-foldable use!
5759
5760 // Now that we know that all uses of this instruction are part of a chain of
5761 // computation involving only operations that could theoretically be folded
5762 // into a memory use, loop over each of these memory operation uses and see
5763 // if they could *actually* fold the instruction. The assumption is that
5764 // addressing modes are cheap and that duplicating the computation involved
5765 // many times is worthwhile, even on a fastpath. For sinking candidates
5766 // (i.e. cold call sites), this serves as a way to prevent excessive code
5767 // growth since most architectures have some reasonable small and fast way to
5768 // compute an effective address. (i.e LEA on x86)
5769 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5770 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5771 Value *Address = Pair.first->get();
5772 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5773 Type *AddressAccessTy = Pair.second;
5774 unsigned AS = Address->getType()->getPointerAddressSpace();
5775
5776 // Do a match against the root of this address, ignoring profitability. This
5777 // will tell us if the addressing mode for the memory operation will
5778 // *actually* cover the shared instruction.
5779 ExtAddrMode Result;
5780 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5781 0);
5782 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5783 TPT.getRestorationPoint();
5784 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5785 AddressAccessTy, AS, UserI, Result,
5786 InsertedInsts, PromotedInsts, TPT,
5787 LargeOffsetGEP, OptSize, PSI, BFI);
5788 Matcher.IgnoreProfitability = true;
5789 bool Success = Matcher.matchAddr(Address, 0);
5790 (void)Success;
5791 assert(Success && "Couldn't select *anything*?");
5792
5793 // The match was to check the profitability, the changes made are not
5794 // part of the original matcher. Therefore, they should be dropped
5795 // otherwise the original matcher will not present the right state.
5796 TPT.rollback(LastKnownGood);
5797
5798 // If the match didn't cover I, then it won't be shared by it.
5799 if (!is_contained(MatchedAddrModeInsts, I))
5800 return false;
5801
5802 MatchedAddrModeInsts.clear();
5803 }
5804
5805 return true;
5806}
5807
5808/// Return true if the specified values are defined in a
5809/// different basic block than BB.
5810static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5812 return I->getParent() != BB;
5813 return false;
5814}
5815
5816// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5817// is the first instruction that will use Addr. So we need to find the first
5818// user of Addr in current BB.
5820 Value *SunkAddr) {
5821 if (Addr->hasOneUse())
5822 return MemoryInst->getIterator();
5823
5824 // We already have a SunkAddr in current BB, but we may need to insert cast
5825 // instruction after it.
5826 if (SunkAddr) {
5827 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5828 return std::next(AddrInst->getIterator());
5829 }
5830
5831 // Find the first user of Addr in current BB.
5832 Instruction *Earliest = MemoryInst;
5833 for (User *U : Addr->users()) {
5834 Instruction *UserInst = dyn_cast<Instruction>(U);
5835 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5836 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5837 continue;
5838 if (UserInst->comesBefore(Earliest))
5839 Earliest = UserInst;
5840 }
5841 }
5842 return Earliest->getIterator();
5843}
5844
5845/// Sink addressing mode computation immediate before MemoryInst if doing so
5846/// can be done without increasing register pressure. The need for the
5847/// register pressure constraint means this can end up being an all or nothing
5848/// decision for all uses of the same addressing computation.
5849///
5850/// Load and Store Instructions often have addressing modes that can do
5851/// significant amounts of computation. As such, instruction selection will try
5852/// to get the load or store to do as much computation as possible for the
5853/// program. The problem is that isel can only see within a single block. As
5854/// such, we sink as much legal addressing mode work into the block as possible.
5855///
5856/// This method is used to optimize both load/store and inline asms with memory
5857/// operands. It's also used to sink addressing computations feeding into cold
5858/// call sites into their (cold) basic block.
5859///
5860/// The motivation for handling sinking into cold blocks is that doing so can
5861/// both enable other address mode sinking (by satisfying the register pressure
5862/// constraint above), and reduce register pressure globally (by removing the
5863/// addressing mode computation from the fast path entirely.).
5864bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5865 Type *AccessTy, unsigned AddrSpace) {
5866 Value *Repl = Addr;
5867
5868 // Try to collapse single-value PHI nodes. This is necessary to undo
5869 // unprofitable PRE transformations.
5870 SmallVector<Value *, 8> worklist;
5871 SmallPtrSet<Value *, 16> Visited;
5872 worklist.push_back(Addr);
5873
5874 // Use a worklist to iteratively look through PHI and select nodes, and
5875 // ensure that the addressing mode obtained from the non-PHI/select roots of
5876 // the graph are compatible.
5877 bool PhiOrSelectSeen = false;
5878 SmallVector<Instruction *, 16> AddrModeInsts;
5879 AddressingModeCombiner AddrModes(*DL, Addr);
5880 TypePromotionTransaction TPT(RemovedInsts);
5881 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5882 TPT.getRestorationPoint();
5883 while (!worklist.empty()) {
5884 Value *V = worklist.pop_back_val();
5885
5886 // We allow traversing cyclic Phi nodes.
5887 // In case of success after this loop we ensure that traversing through
5888 // Phi nodes ends up with all cases to compute address of the form
5889 // BaseGV + Base + Scale * Index + Offset
5890 // where Scale and Offset are constans and BaseGV, Base and Index
5891 // are exactly the same Values in all cases.
5892 // It means that BaseGV, Scale and Offset dominate our memory instruction
5893 // and have the same value as they had in address computation represented
5894 // as Phi. So we can safely sink address computation to memory instruction.
5895 if (!Visited.insert(V).second)
5896 continue;
5897
5898 // For a PHI node, push all of its incoming values.
5899 if (PHINode *P = dyn_cast<PHINode>(V)) {
5900 append_range(worklist, P->incoming_values());
5901 PhiOrSelectSeen = true;
5902 continue;
5903 }
5904 // Similar for select.
5905 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5906 worklist.push_back(SI->getFalseValue());
5907 worklist.push_back(SI->getTrueValue());
5908 PhiOrSelectSeen = true;
5909 continue;
5910 }
5911
5912 // For non-PHIs, determine the addressing mode being computed. Note that
5913 // the result may differ depending on what other uses our candidate
5914 // addressing instructions might have.
5915 AddrModeInsts.clear();
5916 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5917 0);
5918 // Defer the query (and possible computation of) the dom tree to point of
5919 // actual use. It's expected that most address matches don't actually need
5920 // the domtree.
5921 auto getDTFn = [this]() -> const DominatorTree & { return getDT(); };
5922 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5923 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5924 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5925 BFI);
5926
5927 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5928 if (GEP && !NewGEPBases.count(GEP)) {
5929 // If splitting the underlying data structure can reduce the offset of a
5930 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5931 // previously split data structures.
5932 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5933 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5934 }
5935
5936 NewAddrMode.OriginalValue = V;
5937 if (!AddrModes.addNewAddrMode(NewAddrMode))
5938 break;
5939 }
5940
5941 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5942 // or we have multiple but either couldn't combine them or combining them
5943 // wouldn't do anything useful, bail out now.
5944 if (!AddrModes.combineAddrModes()) {
5945 TPT.rollback(LastKnownGood);
5946 return false;
5947 }
5948 bool Modified = TPT.commit();
5949
5950 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5951 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5952
5953 // If all the instructions matched are already in this BB, don't do anything.
5954 // If we saw a Phi node then it is not local definitely, and if we saw a
5955 // select then we want to push the address calculation past it even if it's
5956 // already in this BB.
5957 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5958 return IsNonLocalValue(V, MemoryInst->getParent());
5959 })) {
5960 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5961 << "\n");
5962 return Modified;
5963 }
5964
5965 // Now that we determined the addressing expression we want to use and know
5966 // that we have to sink it into this block. Check to see if we have already
5967 // done this for some other load/store instr in this block. If so, reuse
5968 // the computation. Before attempting reuse, check if the address is valid
5969 // as it may have been erased.
5970
5971 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5972
5973 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5974 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5975
5976 // The current BB may be optimized multiple times, we can't guarantee the
5977 // reuse of Addr happens later, call findInsertPos to find an appropriate
5978 // insert position.
5979 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5980
5981 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5982 if (!SunkAddr) {
5983 auto &DT = getDT();
5984 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5985 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5986 return Modified;
5987 }
5988
5989 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5990
5991 if (SunkAddr) {
5992 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5993 << " for " << *MemoryInst << "\n");
5994 if (SunkAddr->getType() != Addr->getType()) {
5995 if (SunkAddr->getType()->getPointerAddressSpace() !=
5996 Addr->getType()->getPointerAddressSpace() &&
5997 !DL->isNonIntegralPointerType(Addr->getType())) {
5998 // There are two reasons the address spaces might not match: a no-op
5999 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6000 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6001 // TODO: allow bitcast between different address space pointers with the
6002 // same size.
6003 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6004 SunkAddr =
6005 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6006 } else
6007 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6008 }
6010 SubtargetInfo->addrSinkUsingGEPs())) {
6011 // By default, we use the GEP-based method when AA is used later. This
6012 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
6013 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6014 << " for " << *MemoryInst << "\n");
6015 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
6016
6017 // First, find the pointer.
6018 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
6019 ResultPtr = AddrMode.BaseReg;
6020 AddrMode.BaseReg = nullptr;
6021 }
6022
6023 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
6024 // We can't add more than one pointer together, nor can we scale a
6025 // pointer (both of which seem meaningless).
6026 if (ResultPtr || AddrMode.Scale != 1)
6027 return Modified;
6028
6029 ResultPtr = AddrMode.ScaledReg;
6030 AddrMode.Scale = 0;
6031 }
6032
6033 // It is only safe to sign extend the BaseReg if we know that the math
6034 // required to create it did not overflow before we extend it. Since
6035 // the original IR value was tossed in favor of a constant back when
6036 // the AddrMode was created we need to bail out gracefully if widths
6037 // do not match instead of extending it.
6038 //
6039 // (See below for code to add the scale.)
6040 if (AddrMode.Scale) {
6041 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6042 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6043 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6044 return Modified;
6045 }
6046
6047 GlobalValue *BaseGV = AddrMode.BaseGV;
6048 if (BaseGV != nullptr) {
6049 if (ResultPtr)
6050 return Modified;
6051
6052 if (BaseGV->isThreadLocal()) {
6053 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6054 } else {
6055 ResultPtr = BaseGV;
6056 }
6057 }
6058
6059 // If the real base value actually came from an inttoptr, then the matcher
6060 // will look through it and provide only the integer value. In that case,
6061 // use it here.
6062 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6063 if (!ResultPtr && AddrMode.BaseReg) {
6064 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6065 "sunkaddr");
6066 AddrMode.BaseReg = nullptr;
6067 } else if (!ResultPtr && AddrMode.Scale == 1) {
6068 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6069 "sunkaddr");
6070 AddrMode.Scale = 0;
6071 }
6072 }
6073
6074 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6075 !AddrMode.BaseOffs) {
6076 SunkAddr = Constant::getNullValue(Addr->getType());
6077 } else if (!ResultPtr) {
6078 return Modified;
6079 } else {
6080 Type *I8PtrTy =
6081 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6082
6083 // Start with the base register. Do this first so that subsequent address
6084 // matching finds it last, which will prevent it from trying to match it
6085 // as the scaled value in case it happens to be a mul. That would be
6086 // problematic if we've sunk a different mul for the scale, because then
6087 // we'd end up sinking both muls.
6088 if (AddrMode.BaseReg) {
6089 Value *V = AddrMode.BaseReg;
6090 if (V->getType() != IntPtrTy)
6091 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6092
6093 ResultIndex = V;
6094 }
6095
6096 // Add the scale value.
6097 if (AddrMode.Scale) {
6098 Value *V = AddrMode.ScaledReg;
6099 if (V->getType() == IntPtrTy) {
6100 // done.
6101 } else {
6102 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6103 cast<IntegerType>(V->getType())->getBitWidth() &&
6104 "We can't transform if ScaledReg is too narrow");
6105 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6106 }
6107
6108 if (AddrMode.Scale != 1)
6109 V = Builder.CreateMul(
6110 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6111 if (ResultIndex)
6112 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6113 else
6114 ResultIndex = V;
6115 }
6116
6117 // Add in the Base Offset if present.
6118 if (AddrMode.BaseOffs) {
6119 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6120 if (ResultIndex) {
6121 // We need to add this separately from the scale above to help with
6122 // SDAG consecutive load/store merging.
6123 if (ResultPtr->getType() != I8PtrTy)
6124 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6125 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6126 AddrMode.InBounds);
6127 }
6128
6129 ResultIndex = V;
6130 }
6131
6132 if (!ResultIndex) {
6133 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6134 // We know that we have a pointer without any offsets. If this pointer
6135 // originates from a different basic block than the current one, we
6136 // must be able to recreate it in the current basic block.
6137 // We do not support the recreation of any instructions yet.
6138 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6139 return Modified;
6140 SunkAddr = ResultPtr;
6141 } else {
6142 if (ResultPtr->getType() != I8PtrTy)
6143 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6144 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6145 AddrMode.InBounds);
6146 }
6147
6148 if (SunkAddr->getType() != Addr->getType()) {
6149 if (SunkAddr->getType()->getPointerAddressSpace() !=
6150 Addr->getType()->getPointerAddressSpace() &&
6151 !DL->isNonIntegralPointerType(Addr->getType())) {
6152 // There are two reasons the address spaces might not match: a no-op
6153 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6154 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6155 // TODO: allow bitcast between different address space pointers with
6156 // the same size.
6157 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6158 SunkAddr =
6159 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6160 } else
6161 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6162 }
6163 }
6164 } else {
6165 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6166 // non-integral pointers, so in that case bail out now.
6167 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6168 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6169 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6170 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6171 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6172 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6173 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6174 (AddrMode.BaseGV &&
6175 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6176 return Modified;
6177
6178 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6179 << " for " << *MemoryInst << "\n");
6180 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6181 Value *Result = nullptr;
6182
6183 // Start with the base register. Do this first so that subsequent address
6184 // matching finds it last, which will prevent it from trying to match it
6185 // as the scaled value in case it happens to be a mul. That would be
6186 // problematic if we've sunk a different mul for the scale, because then
6187 // we'd end up sinking both muls.
6188 if (AddrMode.BaseReg) {
6189 Value *V = AddrMode.BaseReg;
6190 if (V->getType()->isPointerTy())
6191 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6192 if (V->getType() != IntPtrTy)
6193 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6194 Result = V;
6195 }
6196
6197 // Add the scale value.
6198 if (AddrMode.Scale) {
6199 Value *V = AddrMode.ScaledReg;
6200 if (V->getType() == IntPtrTy) {
6201 // done.
6202 } else if (V->getType()->isPointerTy()) {
6203 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6204 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6205 cast<IntegerType>(V->getType())->getBitWidth()) {
6206 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6207 } else {
6208 // It is only safe to sign extend the BaseReg if we know that the math
6209 // required to create it did not overflow before we extend it. Since
6210 // the original IR value was tossed in favor of a constant back when
6211 // the AddrMode was created we need to bail out gracefully if widths
6212 // do not match instead of extending it.
6214 if (I && (Result != AddrMode.BaseReg))
6215 I->eraseFromParent();
6216 return Modified;
6217 }
6218 if (AddrMode.Scale != 1)
6219 V = Builder.CreateMul(
6220 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6221 if (Result)
6222 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6223 else
6224 Result = V;
6225 }
6226
6227 // Add in the BaseGV if present.
6228 GlobalValue *BaseGV = AddrMode.BaseGV;
6229 if (BaseGV != nullptr) {
6230 Value *BaseGVPtr;
6231 if (BaseGV->isThreadLocal()) {
6232 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6233 } else {
6234 BaseGVPtr = BaseGV;
6235 }
6236 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6237 if (Result)
6238 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6239 else
6240 Result = V;
6241 }
6242
6243 // Add in the Base Offset if present.
6244 if (AddrMode.BaseOffs) {
6245 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6246 if (Result)
6247 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6248 else
6249 Result = V;
6250 }
6251
6252 if (!Result)
6253 SunkAddr = Constant::getNullValue(Addr->getType());
6254 else
6255 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6256 }
6257
6258 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6259 // Store the newly computed address into the cache. In the case we reused a
6260 // value, this should be idempotent.
6261 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6262
6263 // If we have no uses, recursively delete the value and all dead instructions
6264 // using it.
6265 if (Repl->use_empty()) {
6266 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6267 RecursivelyDeleteTriviallyDeadInstructions(
6268 Repl, TLInfo, nullptr,
6269 [&](Value *V) { removeAllAssertingVHReferences(V); });
6270 });
6271 }
6272 ++NumMemoryInsts;
6273 return true;
6274}
6275
6276/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6277/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6278/// only handle a 2 operand GEP in the same basic block or a splat constant
6279/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6280/// index.
6281///
6282/// If the existing GEP has a vector base pointer that is splat, we can look
6283/// through the splat to find the scalar pointer. If we can't find a scalar
6284/// pointer there's nothing we can do.
6285///
6286/// If we have a GEP with more than 2 indices where the middle indices are all
6287/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6288///
6289/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6290/// followed by a GEP with an all zeroes vector index. This will enable
6291/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6292/// zero index.
6293bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6294 Value *Ptr) {
6295 Value *NewAddr;
6296
6297 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6298 // Don't optimize GEPs that don't have indices.
6299 if (!GEP->hasIndices())
6300 return false;
6301
6302 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6303 // FIXME: We should support this by sinking the GEP.
6304 if (MemoryInst->getParent() != GEP->getParent())
6305 return false;
6306
6307 SmallVector<Value *, 2> Ops(GEP->operands());
6308
6309 bool RewriteGEP = false;
6310
6311 if (Ops[0]->getType()->isVectorTy()) {
6312 Ops[0] = getSplatValue(Ops[0]);
6313 if (!Ops[0])
6314 return false;
6315 RewriteGEP = true;
6316 }
6317
6318 unsigned FinalIndex = Ops.size() - 1;
6319
6320 // Ensure all but the last index is 0.
6321 // FIXME: This isn't strictly required. All that's required is that they are
6322 // all scalars or splats.
6323 for (unsigned i = 1; i < FinalIndex; ++i) {
6324 auto *C = dyn_cast<Constant>(Ops[i]);
6325 if (!C)
6326 return false;
6327 if (isa<VectorType>(C->getType()))
6328 C = C->getSplatValue();
6329 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6330 if (!CI || !CI->isZero())
6331 return false;
6332 // Scalarize the index if needed.
6333 Ops[i] = CI;
6334 }
6335
6336 // Try to scalarize the final index.
6337 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6338 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6339 auto *C = dyn_cast<ConstantInt>(V);
6340 // Don't scalarize all zeros vector.
6341 if (!C || !C->isZero()) {
6342 Ops[FinalIndex] = V;
6343 RewriteGEP = true;
6344 }
6345 }
6346 }
6347
6348 // If we made any changes or the we have extra operands, we need to generate
6349 // new instructions.
6350 if (!RewriteGEP && Ops.size() == 2)
6351 return false;
6352
6353 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6354
6355 IRBuilder<> Builder(MemoryInst);
6356
6357 Type *SourceTy = GEP->getSourceElementType();
6358 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6359
6360 // If the final index isn't a vector, emit a scalar GEP containing all ops
6361 // and a vector GEP with all zeroes final index.
6362 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6363 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6364 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6365 auto *SecondTy = GetElementPtrInst::getIndexedType(
6366 SourceTy, ArrayRef(Ops).drop_front());
6367 NewAddr =
6368 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6369 } else {
6370 Value *Base = Ops[0];
6371 Value *Index = Ops[FinalIndex];
6372
6373 // Create a scalar GEP if there are more than 2 operands.
6374 if (Ops.size() != 2) {
6375 // Replace the last index with 0.
6376 Ops[FinalIndex] =
6377 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6378 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6380 SourceTy, ArrayRef(Ops).drop_front());
6381 }
6382
6383 // Now create the GEP with scalar pointer and vector index.
6384 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6385 }
6386 } else if (!isa<Constant>(Ptr)) {
6387 // Not a GEP, maybe its a splat and we can create a GEP to enable
6388 // SelectionDAGBuilder to use it as a uniform base.
6389 Value *V = getSplatValue(Ptr);
6390 if (!V)
6391 return false;
6392
6393 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6394
6395 IRBuilder<> Builder(MemoryInst);
6396
6397 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6398 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6399 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6400 Type *ScalarTy;
6401 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6402 Intrinsic::masked_gather) {
6403 ScalarTy = MemoryInst->getType()->getScalarType();
6404 } else {
6405 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6406 Intrinsic::masked_scatter);
6407 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6408 }
6409 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6410 } else {
6411 // Constant, SelectionDAGBuilder knows to check if its a splat.
6412 return false;
6413 }
6414
6415 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6416
6417 // If we have no uses, recursively delete the value and all dead instructions
6418 // using it.
6419 if (Ptr->use_empty())
6421 Ptr, TLInfo, nullptr,
6422 [&](Value *V) { removeAllAssertingVHReferences(V); });
6423
6424 return true;
6425}
6426
6427// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
6428// Check the pattern we are interested in where there are maximum 2 uses
6429// of the intrinsic which are the extract instructions.
6431 ExtractValueInst *&OverflowExtract) {
6432 // Bail out if it's more than 2 users:
6433 if (I->hasNUsesOrMore(3))
6434 return false;
6435
6436 for (User *U : I->users()) {
6437 auto *Extract = dyn_cast<ExtractValueInst>(U);
6438 if (!Extract || Extract->getNumIndices() != 1)
6439 return false;
6440
6441 unsigned Index = Extract->getIndices()[0];
6442 if (Index == 0)
6443 MulExtract = Extract;
6444 else if (Index == 1)
6445 OverflowExtract = Extract;
6446 else
6447 return false;
6448 }
6449 return true;
6450}
6451
6452// Rewrite the mul_with_overflow intrinsic by checking if both of the
6453// operands' value ranges are within the legal type. If so, we can optimize the
6454// multiplication algorithm. This code is supposed to be written during the step
6455// of type legalization, but given that we need to reconstruct the IR which is
6456// not doable there, we do it here.
6457// The IR after the optimization will look like:
6458// entry:
6459// if signed:
6460// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
6461// overflow_no
6462// else:
6463// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
6464// overflow_no:
6465// overflow:
6466// overflow.res:
6467// \returns true if optimization was applied
6468// TODO: This optimization can be further improved to optimize branching on
6469// overflow where the 'overflow_no' BB can branch directly to the false
6470// successor of overflow, but that would add additional complexity so we leave
6471// it for future work.
6472bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
6473 ModifyDT &ModifiedDT) {
6474 // Check if target supports this optimization.
6476 I->getContext(),
6477 TLI->getValueType(*DL, I->getType()->getContainedType(0))))
6478 return false;
6479
6480 ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
6481 if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
6482 return false;
6483
6484 // Keep track of the instruction to stop reoptimizing it again.
6485 InsertedInsts.insert(I);
6486
6487 Value *LHS = I->getOperand(0);
6488 Value *RHS = I->getOperand(1);
6489 Type *Ty = LHS->getType();
6490 unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
6491 Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
6492
6493 // New BBs:
6494 BasicBlock *OverflowEntryBB =
6495 splitBlockBefore(I->getParent(), I, DTU, LI, nullptr, "");
6496 OverflowEntryBB->takeName(I->getParent());
6497 // Keep the 'br' instruction that is generated as a result of the split to be
6498 // erased/replaced later.
6499 Instruction *OldTerminator = OverflowEntryBB->getTerminator();
6500 BasicBlock *NoOverflowBB =
6501 BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
6502 NoOverflowBB->moveAfter(OverflowEntryBB);
6503 BasicBlock *OverflowBB =
6504 BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
6505 OverflowBB->moveAfter(NoOverflowBB);
6506
6507 // BB overflow.entry:
6508 IRBuilder<> Builder(OverflowEntryBB);
6509 // Extract low and high halves of LHS:
6510 Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
6511 Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
6512 HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6513
6514 // Extract low and high halves of RHS:
6515 Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
6516 Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
6517 HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
6518
6519 Value *IsAnyBitTrue;
6520 if (IsSigned) {
6521 Value *SignLoLHS =
6522 Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
6523 Value *SignLoRHS =
6524 Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
6525 Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6526 Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6527 Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6528 IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
6529 ConstantInt::getNullValue(Or->getType()));
6530 } else {
6531 Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
6532 ConstantInt::getNullValue(LegalTy));
6533 Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
6534 ConstantInt::getNullValue(LegalTy));
6535 IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6536 }
6537 Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
6538
6539 // BB overflow.no:
6540 Builder.SetInsertPoint(NoOverflowBB);
6541 Value *ExtLoLHS, *ExtLoRHS;
6542 if (IsSigned) {
6543 ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
6544 ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
6545 } else {
6546 ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
6547 ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
6548 }
6549
6550 Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
6551
6552 // Create the 'overflow.res' BB to merge the results of
6553 // the two paths:
6554 BasicBlock *OverflowResBB = I->getParent();
6555 OverflowResBB->setName("overflow.res");
6556
6557 // BB overflow.no: jump to overflow.res BB
6558 Builder.CreateBr(OverflowResBB);
6559 // No we don't need the old terminator in overflow.entry BB, erase it:
6560 OldTerminator->eraseFromParent();
6561
6562 // BB overflow.res:
6563 Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6564 // Create PHI nodes to merge results from no.overflow BB and overflow BB to
6565 // replace the extract instructions.
6566 PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
6567 *OverflowFlagPHI =
6568 Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6569
6570 // Add the incoming values from no.overflow BB and later from overflow BB.
6571 OverflowResPHI->addIncoming(Mul, NoOverflowBB);
6572 OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
6573 NoOverflowBB);
6574
6575 // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
6576 if (MulExtract) {
6577 MulExtract->replaceAllUsesWith(OverflowResPHI);
6578 MulExtract->eraseFromParent();
6579 }
6580 if (OverflowExtract) {
6581 OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
6582 OverflowExtract->eraseFromParent();
6583 }
6584
6585 // Remove the intrinsic from parent (overflow.res BB) as it will be part of
6586 // overflow BB
6587 I->removeFromParent();
6588 // BB overflow:
6589 I->insertInto(OverflowBB, OverflowBB->end());
6590 Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6591 Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6592 Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
6593 Builder.CreateBr(OverflowResBB);
6594
6595 // Add The Extracted values to the PHINodes in the overflow.res BB.
6596 OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
6597 OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
6598
6599 DTU->applyUpdates({{DominatorTree::Insert, OverflowEntryBB, OverflowBB},
6600 {DominatorTree::Insert, OverflowEntryBB, NoOverflowBB},
6601 {DominatorTree::Insert, NoOverflowBB, OverflowResBB},
6602 {DominatorTree::Delete, OverflowEntryBB, OverflowResBB},
6603 {DominatorTree::Insert, OverflowBB, OverflowResBB}});
6604
6605 ModifiedDT = ModifyDT::ModifyBBDT;
6606 return true;
6607}
6608
6609/// If there are any memory operands, use OptimizeMemoryInst to sink their
6610/// address computing into the block when possible / profitable.
6611bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6612 bool MadeChange = false;
6613
6614 const TargetRegisterInfo *TRI =
6616 TargetLowering::AsmOperandInfoVector TargetConstraints =
6617 TLI->ParseConstraints(*DL, TRI, *CS);
6618 unsigned ArgNo = 0;
6619 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6620 // Compute the constraint code and ConstraintType to use.
6621 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6622
6623 // TODO: Also handle C_Address?
6624 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6625 OpInfo.isIndirect) {
6626 Value *OpVal = CS->getArgOperand(ArgNo++);
6627 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6628 } else if (OpInfo.Type == InlineAsm::isInput)
6629 ArgNo++;
6630 }
6631
6632 return MadeChange;
6633}
6634
6635/// Check if all the uses of \p Val are equivalent (or free) zero or
6636/// sign extensions.
6637static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6638 assert(!Val->use_empty() && "Input must have at least one use");
6639 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6640 bool IsSExt = isa<SExtInst>(FirstUser);
6641 Type *ExtTy = FirstUser->getType();
6642 for (const User *U : Val->users()) {
6643 const Instruction *UI = cast<Instruction>(U);
6644 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6645 return false;
6646 Type *CurTy = UI->getType();
6647 // Same input and output types: Same instruction after CSE.
6648 if (CurTy == ExtTy)
6649 continue;
6650
6651 // If IsSExt is true, we are in this situation:
6652 // a = Val
6653 // b = sext ty1 a to ty2
6654 // c = sext ty1 a to ty3
6655 // Assuming ty2 is shorter than ty3, this could be turned into:
6656 // a = Val
6657 // b = sext ty1 a to ty2
6658 // c = sext ty2 b to ty3
6659 // However, the last sext is not free.
6660 if (IsSExt)
6661 return false;
6662
6663 // This is a ZExt, maybe this is free to extend from one type to another.
6664 // In that case, we would not account for a different use.
6665 Type *NarrowTy;
6666 Type *LargeTy;
6667 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6668 CurTy->getScalarType()->getIntegerBitWidth()) {
6669 NarrowTy = CurTy;
6670 LargeTy = ExtTy;
6671 } else {
6672 NarrowTy = ExtTy;
6673 LargeTy = CurTy;
6674 }
6675
6676 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6677 return false;
6678 }
6679 // All uses are the same or can be derived from one another for free.
6680 return true;
6681}
6682
6683/// Try to speculatively promote extensions in \p Exts and continue
6684/// promoting through newly promoted operands recursively as far as doing so is
6685/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6686/// When some promotion happened, \p TPT contains the proper state to revert
6687/// them.
6688///
6689/// \return true if some promotion happened, false otherwise.
6690bool CodeGenPrepare::tryToPromoteExts(
6691 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6692 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6693 unsigned CreatedInstsCost) {
6694 bool Promoted = false;
6695
6696 // Iterate over all the extensions to try to promote them.
6697 for (auto *I : Exts) {
6698 // Early check if we directly have ext(load).
6699 if (isa<LoadInst>(I->getOperand(0))) {
6700 ProfitablyMovedExts.push_back(I);
6701 continue;
6702 }
6703
6704 // Check whether or not we want to do any promotion. The reason we have
6705 // this check inside the for loop is to catch the case where an extension
6706 // is directly fed by a load because in such case the extension can be moved
6707 // up without any promotion on its operands.
6709 return false;
6710
6711 // Get the action to perform the promotion.
6712 TypePromotionHelper::Action TPH =
6713 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6714 // Check if we can promote.
6715 if (!TPH) {
6716 // Save the current extension as we cannot move up through its operand.
6717 ProfitablyMovedExts.push_back(I);
6718 continue;
6719 }
6720
6721 // Save the current state.
6722 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6723 TPT.getRestorationPoint();
6724 SmallVector<Instruction *, 4> NewExts;
6725 unsigned NewCreatedInstsCost = 0;
6726 unsigned ExtCost = !TLI->isExtFree(I);
6727 // Promote.
6728 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6729 &NewExts, nullptr, *TLI);
6730 assert(PromotedVal &&
6731 "TypePromotionHelper should have filtered out those cases");
6732
6733 // We would be able to merge only one extension in a load.
6734 // Therefore, if we have more than 1 new extension we heuristically
6735 // cut this search path, because it means we degrade the code quality.
6736 // With exactly 2, the transformation is neutral, because we will merge
6737 // one extension but leave one. However, we optimistically keep going,
6738 // because the new extension may be removed too. Also avoid replacing a
6739 // single free extension with multiple extensions, as this increases the
6740 // number of IR instructions while not providing any savings.
6741 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6742 // FIXME: It would be possible to propagate a negative value instead of
6743 // conservatively ceiling it to 0.
6744 TotalCreatedInstsCost =
6745 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6746 if (!StressExtLdPromotion &&
6747 (TotalCreatedInstsCost > 1 ||
6748 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6749 (ExtCost == 0 && NewExts.size() > 1))) {
6750 // This promotion is not profitable, rollback to the previous state, and
6751 // save the current extension in ProfitablyMovedExts as the latest
6752 // speculative promotion turned out to be unprofitable.
6753 TPT.rollback(LastKnownGood);
6754 ProfitablyMovedExts.push_back(I);
6755 continue;
6756 }
6757 // Continue promoting NewExts as far as doing so is profitable.
6758 SmallVector<Instruction *, 2> NewlyMovedExts;
6759 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6760 bool NewPromoted = false;
6761 for (auto *ExtInst : NewlyMovedExts) {
6762 Instruction *MovedExt = cast<Instruction>(ExtInst);
6763 Value *ExtOperand = MovedExt->getOperand(0);
6764 // If we have reached to a load, we need this extra profitability check
6765 // as it could potentially be merged into an ext(load).
6766 if (isa<LoadInst>(ExtOperand) &&
6767 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6768 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6769 continue;
6770
6771 ProfitablyMovedExts.push_back(MovedExt);
6772 NewPromoted = true;
6773 }
6774
6775 // If none of speculative promotions for NewExts is profitable, rollback
6776 // and save the current extension (I) as the last profitable extension.
6777 if (!NewPromoted) {
6778 TPT.rollback(LastKnownGood);
6779 ProfitablyMovedExts.push_back(I);
6780 continue;
6781 }
6782 // The promotion is profitable.
6783 Promoted = true;
6784 }
6785 return Promoted;
6786}
6787
6788/// Merging redundant sexts when one is dominating the other.
6789bool CodeGenPrepare::mergeSExts(Function &F) {
6790 bool Changed = false;
6791 for (auto &Entry : ValToSExtendedUses) {
6792 SExts &Insts = Entry.second;
6793 SExts CurPts;
6794 for (Instruction *Inst : Insts) {
6795 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6796 Inst->getOperand(0) != Entry.first)
6797 continue;
6798 bool inserted = false;
6799 for (auto &Pt : CurPts) {
6800 if (getDT().dominates(Inst, Pt)) {
6801 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6802 RemovedInsts.insert(Pt);
6803 Pt->removeFromParent();
6804 Pt = Inst;
6805 inserted = true;
6806 Changed = true;
6807 break;
6808 }
6809 if (!getDT().dominates(Pt, Inst))
6810 // Give up if we need to merge in a common dominator as the
6811 // experiments show it is not profitable.
6812 continue;
6813 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6814 RemovedInsts.insert(Inst);
6815 Inst->removeFromParent();
6816 inserted = true;
6817 Changed = true;
6818 break;
6819 }
6820 if (!inserted)
6821 CurPts.push_back(Inst);
6822 }
6823 }
6824 return Changed;
6825}
6826
6827// Splitting large data structures so that the GEPs accessing them can have
6828// smaller offsets so that they can be sunk to the same blocks as their users.
6829// For example, a large struct starting from %base is split into two parts
6830// where the second part starts from %new_base.
6831//
6832// Before:
6833// BB0:
6834// %base =
6835//
6836// BB1:
6837// %gep0 = gep %base, off0
6838// %gep1 = gep %base, off1
6839// %gep2 = gep %base, off2
6840//
6841// BB2:
6842// %load1 = load %gep0
6843// %load2 = load %gep1
6844// %load3 = load %gep2
6845//
6846// After:
6847// BB0:
6848// %base =
6849// %new_base = gep %base, off0
6850//
6851// BB1:
6852// %new_gep0 = %new_base
6853// %new_gep1 = gep %new_base, off1 - off0
6854// %new_gep2 = gep %new_base, off2 - off0
6855//
6856// BB2:
6857// %load1 = load i32, i32* %new_gep0
6858// %load2 = load i32, i32* %new_gep1
6859// %load3 = load i32, i32* %new_gep2
6860//
6861// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6862// their offsets are smaller enough to fit into the addressing mode.
6863bool CodeGenPrepare::splitLargeGEPOffsets() {
6864 bool Changed = false;
6865 for (auto &Entry : LargeOffsetGEPMap) {
6866 Value *OldBase = Entry.first;
6867 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6868 &LargeOffsetGEPs = Entry.second;
6869 auto compareGEPOffset =
6870 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6871 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6872 if (LHS.first == RHS.first)
6873 return false;
6874 if (LHS.second != RHS.second)
6875 return LHS.second < RHS.second;
6876 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6877 };
6878 // Sorting all the GEPs of the same data structures based on the offsets.
6879 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6880 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6881 // Skip if all the GEPs have the same offsets.
6882 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6883 continue;
6884 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6885 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6886 Value *NewBaseGEP = nullptr;
6887
6888 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6889 GetElementPtrInst *GEP) {
6890 LLVMContext &Ctx = GEP->getContext();
6891 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6892 Type *I8PtrTy =
6893 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6894
6895 BasicBlock::iterator NewBaseInsertPt;
6896 BasicBlock *NewBaseInsertBB;
6897 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6898 // If the base of the struct is an instruction, the new base will be
6899 // inserted close to it.
6900 NewBaseInsertBB = BaseI->getParent();
6901 if (isa<PHINode>(BaseI))
6902 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6903 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6904 NewBaseInsertBB =
6905 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), &getDT(), LI);
6906 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6907 } else
6908 NewBaseInsertPt = std::next(BaseI->getIterator());
6909 } else {
6910 // If the current base is an argument or global value, the new base
6911 // will be inserted to the entry block.
6912 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6913 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6914 }
6915 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6916 // Create a new base.
6917 // TODO: Avoid implicit trunc?
6918 // See https://github.com/llvm/llvm-project/issues/112510.
6919 Value *BaseIndex =
6920 ConstantInt::getSigned(PtrIdxTy, BaseOffset, /*ImplicitTrunc=*/true);
6921 NewBaseGEP = OldBase;
6922 if (NewBaseGEP->getType() != I8PtrTy)
6923 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6924 NewBaseGEP =
6925 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6926 NewGEPBases.insert(NewBaseGEP);
6927 return;
6928 };
6929
6930 // Check whether all the offsets can be encoded with prefered common base.
6931 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6932 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6933 BaseOffset = PreferBase;
6934 // Create a new base if the offset of the BaseGEP can be decoded with one
6935 // instruction.
6936 createNewBase(BaseOffset, OldBase, BaseGEP);
6937 }
6938
6939 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6940 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6941 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6942 int64_t Offset = LargeOffsetGEP->second;
6943 if (Offset != BaseOffset) {
6944 TargetLowering::AddrMode AddrMode;
6945 AddrMode.HasBaseReg = true;
6946 AddrMode.BaseOffs = Offset - BaseOffset;
6947 // The result type of the GEP might not be the type of the memory
6948 // access.
6949 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6950 GEP->getResultElementType(),
6951 GEP->getAddressSpace())) {
6952 // We need to create a new base if the offset to the current base is
6953 // too large to fit into the addressing mode. So, a very large struct
6954 // may be split into several parts.
6955 BaseGEP = GEP;
6956 BaseOffset = Offset;
6957 NewBaseGEP = nullptr;
6958 }
6959 }
6960
6961 // Generate a new GEP to replace the current one.
6962 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6963
6964 if (!NewBaseGEP) {
6965 // Create a new base if we don't have one yet. Find the insertion
6966 // pointer for the new base first.
6967 createNewBase(BaseOffset, OldBase, GEP);
6968 }
6969
6970 IRBuilder<> Builder(GEP);
6971 Value *NewGEP = NewBaseGEP;
6972 if (Offset != BaseOffset) {
6973 // Calculate the new offset for the new GEP.
6974 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6975 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6976 }
6977 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6978 LargeOffsetGEPID.erase(GEP);
6979 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6980 GEP->eraseFromParent();
6981 Changed = true;
6982 }
6983 }
6984 return Changed;
6985}
6986
6987bool CodeGenPrepare::optimizePhiType(
6988 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6989 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6990 // We are looking for a collection on interconnected phi nodes that together
6991 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6992 // are of the same type. Convert the whole set of nodes to the type of the
6993 // bitcast.
6994 Type *PhiTy = I->getType();
6995 Type *ConvertTy = nullptr;
6996 if (Visited.count(I) ||
6997 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6998 return false;
6999
7000 SmallVector<Instruction *, 4> Worklist;
7001 Worklist.push_back(cast<Instruction>(I));
7002 SmallPtrSet<PHINode *, 4> PhiNodes;
7003 SmallPtrSet<ConstantData *, 4> Constants;
7004 PhiNodes.insert(I);
7005 Visited.insert(I);
7006 SmallPtrSet<Instruction *, 4> Defs;
7007 SmallPtrSet<Instruction *, 4> Uses;
7008 // This works by adding extra bitcasts between load/stores and removing
7009 // existing bitcasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
7010 // we can get in the situation where we remove a bitcast in one iteration
7011 // just to add it again in the next. We need to ensure that at least one
7012 // bitcast we remove are anchored to something that will not change back.
7013 bool AnyAnchored = false;
7014
7015 while (!Worklist.empty()) {
7016 Instruction *II = Worklist.pop_back_val();
7017
7018 if (auto *Phi = dyn_cast<PHINode>(II)) {
7019 // Handle Defs, which might also be PHI's
7020 for (Value *V : Phi->incoming_values()) {
7021 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7022 if (!PhiNodes.count(OpPhi)) {
7023 if (!Visited.insert(OpPhi).second)
7024 return false;
7025 PhiNodes.insert(OpPhi);
7026 Worklist.push_back(OpPhi);
7027 }
7028 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
7029 if (!OpLoad->isSimple())
7030 return false;
7031 if (Defs.insert(OpLoad).second)
7032 Worklist.push_back(OpLoad);
7033 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
7034 if (Defs.insert(OpEx).second)
7035 Worklist.push_back(OpEx);
7036 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7037 if (!ConvertTy)
7038 ConvertTy = OpBC->getOperand(0)->getType();
7039 if (OpBC->getOperand(0)->getType() != ConvertTy)
7040 return false;
7041 if (Defs.insert(OpBC).second) {
7042 Worklist.push_back(OpBC);
7043 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
7044 !isa<ExtractElementInst>(OpBC->getOperand(0));
7045 }
7046 } else if (auto *OpC = dyn_cast<ConstantData>(V))
7047 Constants.insert(OpC);
7048 else
7049 return false;
7050 }
7051 }
7052
7053 // Handle uses which might also be phi's
7054 for (User *V : II->users()) {
7055 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7056 if (!PhiNodes.count(OpPhi)) {
7057 if (Visited.count(OpPhi))
7058 return false;
7059 PhiNodes.insert(OpPhi);
7060 Visited.insert(OpPhi);
7061 Worklist.push_back(OpPhi);
7062 }
7063 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
7064 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
7065 return false;
7066 Uses.insert(OpStore);
7067 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7068 if (!ConvertTy)
7069 ConvertTy = OpBC->getType();
7070 if (OpBC->getType() != ConvertTy)
7071 return false;
7072 Uses.insert(OpBC);
7073 AnyAnchored |=
7074 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
7075 } else {
7076 return false;
7077 }
7078 }
7079 }
7080
7081 if (!ConvertTy || !AnyAnchored || PhiTy == ConvertTy ||
7082 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
7083 return false;
7084
7085 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
7086 << *ConvertTy << "\n");
7087
7088 // Create all the new phi nodes of the new type, and bitcast any loads to the
7089 // correct type.
7090 ValueToValueMap ValMap;
7091 for (ConstantData *C : Constants)
7092 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
7093 for (Instruction *D : Defs) {
7094 if (isa<BitCastInst>(D)) {
7095 ValMap[D] = D->getOperand(0);
7096 DeletedInstrs.insert(D);
7097 } else {
7098 BasicBlock::iterator insertPt = std::next(D->getIterator());
7099 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
7100 }
7101 }
7102 for (PHINode *Phi : PhiNodes)
7103 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
7104 Phi->getName() + ".tc", Phi->getIterator());
7105 // Pipe together all the PhiNodes.
7106 for (PHINode *Phi : PhiNodes) {
7107 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
7108 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
7109 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
7110 Phi->getIncomingBlock(i));
7111 Visited.insert(NewPhi);
7112 }
7113 // And finally pipe up the stores and bitcasts
7114 for (Instruction *U : Uses) {
7115 if (isa<BitCastInst>(U)) {
7116 DeletedInstrs.insert(U);
7117 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
7118 } else {
7119 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
7120 U->getIterator()));
7121 }
7122 }
7123
7124 // Save the removed phis to be deleted later.
7125 DeletedInstrs.insert_range(PhiNodes);
7126 return true;
7127}
7128
7129bool CodeGenPrepare::optimizePhiTypes(Function &F) {
7130 if (!OptimizePhiTypes)
7131 return false;
7132
7133 bool Changed = false;
7134 SmallPtrSet<PHINode *, 4> Visited;
7135 SmallPtrSet<Instruction *, 4> DeletedInstrs;
7136
7137 // Attempt to optimize all the phis in the functions to the correct type.
7138 for (auto &BB : F)
7139 for (auto &Phi : BB.phis())
7140 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
7141
7142 // Remove any old phi's that have been converted.
7143 for (auto *I : DeletedInstrs) {
7144 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
7145 I->eraseFromParent();
7146 }
7147
7148 return Changed;
7149}
7150
7151/// Return true, if an ext(load) can be formed from an extension in
7152/// \p MovedExts.
7153bool CodeGenPrepare::canFormExtLd(
7154 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
7155 Instruction *&Inst, bool HasPromoted) {
7156 for (auto *MovedExtInst : MovedExts) {
7157 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
7158 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
7159 Inst = MovedExtInst;
7160 break;
7161 }
7162 }
7163 if (!LI)
7164 return false;
7165
7166 // If they're already in the same block, there's nothing to do.
7167 // Make the cheap checks first if we did not promote.
7168 // If we promoted, we need to check if it is indeed profitable.
7169 if (!HasPromoted && LI->getParent() == Inst->getParent())
7170 return false;
7171
7172 return TLI->isExtLoad(LI, Inst, *DL);
7173}
7174
7175/// Move a zext or sext fed by a load into the same basic block as the load,
7176/// unless conditions are unfavorable. This allows SelectionDAG to fold the
7177/// extend into the load.
7178///
7179/// E.g.,
7180/// \code
7181/// %ld = load i32* %addr
7182/// %add = add nuw i32 %ld, 4
7183/// %zext = zext i32 %add to i64
7184// \endcode
7185/// =>
7186/// \code
7187/// %ld = load i32* %addr
7188/// %zext = zext i32 %ld to i64
7189/// %add = add nuw i64 %zext, 4
7190/// \encode
7191/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
7192/// allow us to match zext(load i32*) to i64.
7193///
7194/// Also, try to promote the computations used to obtain a sign extended
7195/// value used into memory accesses.
7196/// E.g.,
7197/// \code
7198/// a = add nsw i32 b, 3
7199/// d = sext i32 a to i64
7200/// e = getelementptr ..., i64 d
7201/// \endcode
7202/// =>
7203/// \code
7204/// f = sext i32 b to i64
7205/// a = add nsw i64 f, 3
7206/// e = getelementptr ..., i64 a
7207/// \endcode
7208///
7209/// \p Inst[in/out] the extension may be modified during the process if some
7210/// promotions apply.
7211bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7212 bool AllowPromotionWithoutCommonHeader = false;
7213 /// See if it is an interesting sext operations for the address type
7214 /// promotion before trying to promote it, e.g., the ones with the right
7215 /// type and used in memory accesses.
7216 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7217 *Inst, AllowPromotionWithoutCommonHeader);
7218 TypePromotionTransaction TPT(RemovedInsts);
7219 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7220 TPT.getRestorationPoint();
7222 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7223 Exts.push_back(Inst);
7224
7225 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7226
7227 // Look for a load being extended.
7228 LoadInst *LI = nullptr;
7229 Instruction *ExtFedByLoad;
7230
7231 // Try to promote a chain of computation if it allows to form an extended
7232 // load.
7233 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7234 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7235 TPT.commit();
7236 // Move the extend into the same block as the load.
7237 ExtFedByLoad->moveAfter(LI);
7238 ++NumExtsMoved;
7239 Inst = ExtFedByLoad;
7240 return true;
7241 }
7242
7243 // Continue promoting SExts if known as considerable depending on targets.
7244 if (ATPConsiderable &&
7245 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7246 HasPromoted, TPT, SpeculativelyMovedExts))
7247 return true;
7248
7249 TPT.rollback(LastKnownGood);
7250 return false;
7251}
7252
7253// Perform address type promotion if doing so is profitable.
7254// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7255// instructions that sign extended the same initial value. However, if
7256// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7257// extension is just profitable.
7258bool CodeGenPrepare::performAddressTypePromotion(
7259 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7260 bool HasPromoted, TypePromotionTransaction &TPT,
7261 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7262 bool Promoted = false;
7263 SmallPtrSet<Instruction *, 1> UnhandledExts;
7264 bool AllSeenFirst = true;
7265 for (auto *I : SpeculativelyMovedExts) {
7266 Value *HeadOfChain = I->getOperand(0);
7267 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7268 SeenChainsForSExt.find(HeadOfChain);
7269 // If there is an unhandled SExt which has the same header, try to promote
7270 // it as well.
7271 if (AlreadySeen != SeenChainsForSExt.end()) {
7272 if (AlreadySeen->second != nullptr)
7273 UnhandledExts.insert(AlreadySeen->second);
7274 AllSeenFirst = false;
7275 }
7276 }
7277
7278 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7279 SpeculativelyMovedExts.size() == 1)) {
7280 TPT.commit();
7281 if (HasPromoted)
7282 Promoted = true;
7283 for (auto *I : SpeculativelyMovedExts) {
7284 Value *HeadOfChain = I->getOperand(0);
7285 SeenChainsForSExt[HeadOfChain] = nullptr;
7286 ValToSExtendedUses[HeadOfChain].push_back(I);
7287 }
7288 // Update Inst as promotion happen.
7289 Inst = SpeculativelyMovedExts.pop_back_val();
7290 } else {
7291 // This is the first chain visited from the header, keep the current chain
7292 // as unhandled. Defer to promote this until we encounter another SExt
7293 // chain derived from the same header.
7294 for (auto *I : SpeculativelyMovedExts) {
7295 Value *HeadOfChain = I->getOperand(0);
7296 SeenChainsForSExt[HeadOfChain] = Inst;
7297 }
7298 return false;
7299 }
7300
7301 if (!AllSeenFirst && !UnhandledExts.empty())
7302 for (auto *VisitedSExt : UnhandledExts) {
7303 if (RemovedInsts.count(VisitedSExt))
7304 continue;
7305 TypePromotionTransaction TPT(RemovedInsts);
7307 SmallVector<Instruction *, 2> Chains;
7308 Exts.push_back(VisitedSExt);
7309 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7310 TPT.commit();
7311 if (HasPromoted)
7312 Promoted = true;
7313 for (auto *I : Chains) {
7314 Value *HeadOfChain = I->getOperand(0);
7315 // Mark this as handled.
7316 SeenChainsForSExt[HeadOfChain] = nullptr;
7317 ValToSExtendedUses[HeadOfChain].push_back(I);
7318 }
7319 }
7320 return Promoted;
7321}
7322
7323bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7324 BasicBlock *DefBB = I->getParent();
7325
7326 // If the result of a {s|z}ext and its source are both live out, rewrite all
7327 // other uses of the source with result of extension.
7328 Value *Src = I->getOperand(0);
7329 if (Src->hasOneUse())
7330 return false;
7331
7332 // Only do this xform if truncating is free.
7333 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7334 return false;
7335
7336 // Only safe to perform the optimization if the source is also defined in
7337 // this block.
7338 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7339 return false;
7340
7341 bool DefIsLiveOut = false;
7342 for (User *U : I->users()) {
7344
7345 // Figure out which BB this ext is used in.
7346 BasicBlock *UserBB = UI->getParent();
7347 if (UserBB == DefBB)
7348 continue;
7349 DefIsLiveOut = true;
7350 break;
7351 }
7352 if (!DefIsLiveOut)
7353 return false;
7354
7355 // Make sure none of the uses are PHI nodes.
7356 for (User *U : Src->users()) {
7358 BasicBlock *UserBB = UI->getParent();
7359 if (UserBB == DefBB)
7360 continue;
7361 // Be conservative. We don't want this xform to end up introducing
7362 // reloads just before load / store instructions.
7363 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7364 return false;
7365 }
7366
7367 // InsertedTruncs - Only insert one trunc in each block once.
7368 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7369
7370 bool MadeChange = false;
7371 for (Use &U : Src->uses()) {
7372 Instruction *User = cast<Instruction>(U.getUser());
7373
7374 // Figure out which BB this ext is used in.
7375 BasicBlock *UserBB = User->getParent();
7376 if (UserBB == DefBB)
7377 continue;
7378
7379 // Both src and def are live in this block. Rewrite the use.
7380 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7381
7382 if (!InsertedTrunc) {
7383 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7384 assert(InsertPt != UserBB->end());
7385 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7386 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7387 InsertedInsts.insert(InsertedTrunc);
7388 }
7389
7390 // Replace a use of the {s|z}ext source with a use of the result.
7391 U = InsertedTrunc;
7392 ++NumExtUses;
7393 MadeChange = true;
7394 }
7395
7396 return MadeChange;
7397}
7398
7399// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7400// just after the load if the target can fold this into one extload instruction,
7401// with the hope of eliminating some of the other later "and" instructions using
7402// the loaded value. "and"s that are made trivially redundant by the insertion
7403// of the new "and" are removed by this function, while others (e.g. those whose
7404// path from the load goes through a phi) are left for isel to potentially
7405// remove.
7406//
7407// For example:
7408//
7409// b0:
7410// x = load i32
7411// ...
7412// b1:
7413// y = and x, 0xff
7414// z = use y
7415//
7416// becomes:
7417//
7418// b0:
7419// x = load i32
7420// x' = and x, 0xff
7421// ...
7422// b1:
7423// z = use x'
7424//
7425// whereas:
7426//
7427// b0:
7428// x1 = load i32
7429// ...
7430// b1:
7431// x2 = load i32
7432// ...
7433// b2:
7434// x = phi x1, x2
7435// y = and x, 0xff
7436//
7437// becomes (after a call to optimizeLoadExt for each load):
7438//
7439// b0:
7440// x1 = load i32
7441// x1' = and x1, 0xff
7442// ...
7443// b1:
7444// x2 = load i32
7445// x2' = and x2, 0xff
7446// ...
7447// b2:
7448// x = phi x1', x2'
7449// y = and x, 0xff
7450bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7451 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7452 return false;
7453
7454 // Skip loads we've already transformed.
7455 if (Load->hasOneUse() &&
7456 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7457 return false;
7458
7459 // Look at all uses of Load, looking through phis, to determine how many bits
7460 // of the loaded value are needed.
7461 SmallVector<Instruction *, 8> WorkList;
7462 SmallPtrSet<Instruction *, 16> Visited;
7463 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7464 SmallVector<Instruction *, 8> DropFlags;
7465 for (auto *U : Load->users())
7466 WorkList.push_back(cast<Instruction>(U));
7467
7468 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7469 unsigned BitWidth = LoadResultVT.getSizeInBits();
7470 // If the BitWidth is 0, do not try to optimize the type
7471 if (BitWidth == 0)
7472 return false;
7473
7474 APInt DemandBits(BitWidth, 0);
7475 APInt WidestAndBits(BitWidth, 0);
7476
7477 while (!WorkList.empty()) {
7478 Instruction *I = WorkList.pop_back_val();
7479
7480 // Break use-def graph loops.
7481 if (!Visited.insert(I).second)
7482 continue;
7483
7484 // For a PHI node, push all of its users.
7485 if (auto *Phi = dyn_cast<PHINode>(I)) {
7486 for (auto *U : Phi->users())
7487 WorkList.push_back(cast<Instruction>(U));
7488 continue;
7489 }
7490
7491 switch (I->getOpcode()) {
7492 case Instruction::And: {
7493 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7494 if (!AndC)
7495 return false;
7496 APInt AndBits = AndC->getValue();
7497 DemandBits |= AndBits;
7498 // Keep track of the widest and mask we see.
7499 if (AndBits.ugt(WidestAndBits))
7500 WidestAndBits = AndBits;
7501 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7502 AndsToMaybeRemove.push_back(I);
7503 break;
7504 }
7505
7506 case Instruction::Shl: {
7507 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7508 if (!ShlC)
7509 return false;
7510 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7511 DemandBits.setLowBits(BitWidth - ShiftAmt);
7512 DropFlags.push_back(I);
7513 break;
7514 }
7515
7516 case Instruction::Trunc: {
7517 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7518 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7519 DemandBits.setLowBits(TruncBitWidth);
7520 DropFlags.push_back(I);
7521 break;
7522 }
7523
7524 default:
7525 return false;
7526 }
7527 }
7528
7529 uint32_t ActiveBits = DemandBits.getActiveBits();
7530 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7531 // target even if isLoadLegal says an i1 EXTLOAD is valid. For example,
7532 // for the AArch64 target isLoadLegal(i32, i1, ..., ZEXTLOAD, false) returns
7533 // true, but (and (load x) 1) is not matched as a single instruction, rather
7534 // as a LDR followed by an AND.
7535 // TODO: Look into removing this restriction by fixing backends to either
7536 // return false for isLoadLegal for i1 or have them select this pattern to
7537 // a single instruction.
7538 //
7539 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7540 // mask, since these are the only ands that will be removed by isel.
7541 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7542 WidestAndBits != DemandBits)
7543 return false;
7544
7545 LLVMContext &Ctx = Load->getType()->getContext();
7546 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7547 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7548
7549 // Reject cases that won't be matched as extloads.
7550 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7551 !TLI->isLoadLegal(LoadResultVT, TruncVT, Load->getAlign(),
7552 Load->getPointerAddressSpace(), ISD::ZEXTLOAD, false))
7553 return false;
7554
7555 IRBuilder<> Builder(Load->getNextNode());
7556 auto *NewAnd = cast<Instruction>(
7557 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7558 // Mark this instruction as "inserted by CGP", so that other
7559 // optimizations don't touch it.
7560 InsertedInsts.insert(NewAnd);
7561
7562 // Replace all uses of load with new and (except for the use of load in the
7563 // new and itself).
7564 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7565 NewAnd->setOperand(0, Load);
7566
7567 // Remove any and instructions that are now redundant.
7568 for (auto *And : AndsToMaybeRemove)
7569 // Check that the and mask is the same as the one we decided to put on the
7570 // new and.
7571 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7572 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7573 if (&*CurInstIterator == And)
7574 CurInstIterator = std::next(And->getIterator());
7575 And->eraseFromParent();
7576 ++NumAndUses;
7577 }
7578
7579 // NSW flags may not longer hold.
7580 for (auto *Inst : DropFlags)
7581 Inst->setHasNoSignedWrap(false);
7582
7583 ++NumAndsAdded;
7584 return true;
7585}
7586
7587/// Check if V (an operand of a select instruction) is an expensive instruction
7588/// that is only used once.
7590 auto *I = dyn_cast<Instruction>(V);
7591 // If it's safe to speculatively execute, then it should not have side
7592 // effects; therefore, it's safe to sink and possibly *not* execute.
7593 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7594 TTI->isExpensiveToSpeculativelyExecute(I);
7595}
7596
7597/// Returns true if a SelectInst should be turned into an explicit branch.
7599 const TargetLowering *TLI,
7600 SelectInst *SI) {
7601 // If even a predictable select is cheap, then a branch can't be cheaper.
7602 if (!TLI->isPredictableSelectExpensive())
7603 return false;
7604
7605 // FIXME: This should use the same heuristics as IfConversion to determine
7606 // whether a select is better represented as a branch.
7607
7608 // If metadata tells us that the select condition is obviously predictable,
7609 // then we want to replace the select with a branch.
7610 uint64_t TrueWeight, FalseWeight;
7611 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7612 uint64_t Max = std::max(TrueWeight, FalseWeight);
7613 uint64_t Sum = TrueWeight + FalseWeight;
7614 if (Sum != 0) {
7615 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7616 if (Probability > TTI->getPredictableBranchThreshold())
7617 return true;
7618 }
7619 }
7620
7621 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7622
7623 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7624 // comparison condition. If the compare has more than one use, there's
7625 // probably another cmov or setcc around, so it's not worth emitting a branch.
7626 if (!Cmp || !Cmp->hasOneUse())
7627 return false;
7628
7629 // If either operand of the select is expensive and only needed on one side
7630 // of the select, we should form a branch.
7631 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7632 sinkSelectOperand(TTI, SI->getFalseValue()))
7633 return true;
7634
7635 return false;
7636}
7637
7638/// If \p isTrue is true, return the true value of \p SI, otherwise return
7639/// false value of \p SI. If the true/false value of \p SI is defined by any
7640/// select instructions in \p Selects, look through the defining select
7641/// instruction until the true/false value is not defined in \p Selects.
7642static Value *
7644 const SmallPtrSet<const Instruction *, 2> &Selects) {
7645 Value *V = nullptr;
7646
7647 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7648 DefSI = dyn_cast<SelectInst>(V)) {
7649 assert(DefSI->getCondition() == SI->getCondition() &&
7650 "The condition of DefSI does not match with SI");
7651 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7652 }
7653
7654 assert(V && "Failed to get select true/false value");
7655 return V;
7656}
7657
7658bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7659 assert(Shift->isShift() && "Expected a shift");
7660
7661 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7662 // general vector shifts, and (3) the shift amount is a select-of-splatted
7663 // values, hoist the shifts before the select:
7664 // shift Op0, (select Cond, TVal, FVal) -->
7665 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7666 //
7667 // This is inverting a generic IR transform when we know that the cost of a
7668 // general vector shift is more than the cost of 2 shift-by-scalars.
7669 // We can't do this effectively in SDAG because we may not be able to
7670 // determine if the select operands are splats from within a basic block.
7671 Type *Ty = Shift->getType();
7672 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7673 return false;
7674 Value *Cond, *TVal, *FVal;
7675 if (!match(Shift->getOperand(1),
7676 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7677 return false;
7678 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7679 return false;
7680
7681 IRBuilder<> Builder(Shift);
7682 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7683 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7684 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7685 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7686 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7687 Shift->eraseFromParent();
7688 return true;
7689}
7690
7691bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7692 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7693 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7694 "Expected a funnel shift");
7695
7696 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7697 // than general vector shifts, and (3) the shift amount is select-of-splatted
7698 // values, hoist the funnel shifts before the select:
7699 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7700 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7701 //
7702 // This is inverting a generic IR transform when we know that the cost of a
7703 // general vector shift is more than the cost of 2 shift-by-scalars.
7704 // We can't do this effectively in SDAG because we may not be able to
7705 // determine if the select operands are splats from within a basic block.
7706 Type *Ty = Fsh->getType();
7707 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7708 return false;
7709 Value *Cond, *TVal, *FVal;
7710 if (!match(Fsh->getOperand(2),
7711 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7712 return false;
7713 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7714 return false;
7715
7716 IRBuilder<> Builder(Fsh);
7717 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7718 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7719 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7720 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7721 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7722 Fsh->eraseFromParent();
7723 return true;
7724}
7725
7726/// If we have a SelectInst that will likely profit from branch prediction,
7727/// turn it into a branch.
7728bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7730 return false;
7731
7732 // If the SelectOptimize pass is enabled, selects have already been optimized.
7734 return false;
7735
7736 // Find all consecutive select instructions that share the same condition.
7738 ASI.push_back(SI);
7740 It != SI->getParent()->end(); ++It) {
7741 SelectInst *I = dyn_cast<SelectInst>(&*It);
7742 if (I && SI->getCondition() == I->getCondition()) {
7743 ASI.push_back(I);
7744 } else {
7745 break;
7746 }
7747 }
7748
7749 SelectInst *LastSI = ASI.back();
7750 // Increment the current iterator to skip all the rest of select instructions
7751 // because they will be either "not lowered" or "all lowered" to branch.
7752 CurInstIterator = std::next(LastSI->getIterator());
7753 // Examine debug-info attached to the consecutive select instructions. They
7754 // won't be individually optimised by optimizeInst, so we need to perform
7755 // DbgVariableRecord maintenence here instead.
7756 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7757 fixupDbgVariableRecordsOnInst(*SI);
7758
7759 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7760
7761 // Can we convert the 'select' to CF ?
7762 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7763 return false;
7764
7765 TargetLowering::SelectSupportKind SelectKind;
7766 if (SI->getType()->isVectorTy())
7767 SelectKind = TargetLowering::ScalarCondVectorVal;
7768 else
7769 SelectKind = TargetLowering::ScalarValSelect;
7770
7771 if (TLI->isSelectSupported(SelectKind) &&
7773 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI)))
7774 return false;
7775
7776 // Transform a sequence like this:
7777 // start:
7778 // %cmp = cmp uge i32 %a, %b
7779 // %sel = select i1 %cmp, i32 %c, i32 %d
7780 //
7781 // Into:
7782 // start:
7783 // %cmp = cmp uge i32 %a, %b
7784 // %cmp.frozen = freeze %cmp
7785 // br i1 %cmp.frozen, label %select.true, label %select.false
7786 // select.true:
7787 // br label %select.end
7788 // select.false:
7789 // br label %select.end
7790 // select.end:
7791 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7792 //
7793 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7794 // In addition, we may sink instructions that produce %c or %d from
7795 // the entry block into the destination(s) of the new branch.
7796 // If the true or false blocks do not contain a sunken instruction, that
7797 // block and its branch may be optimized away. In that case, one side of the
7798 // first branch will point directly to select.end, and the corresponding PHI
7799 // predecessor block will be the start block.
7800 // The CFG is altered here and we update the DominatorTree and the LoopInfo,
7801 // but we don't set a ModifiedDT flag to avoid restarting the function walk in
7802 // runOnFunction for each select optimized.
7803
7804 // Collect values that go on the true side and the values that go on the false
7805 // side.
7806 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7807 for (SelectInst *SI : ASI) {
7808 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7809 TrueInstrs.push_back(cast<Instruction>(V));
7810 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7811 FalseInstrs.push_back(cast<Instruction>(V));
7812 }
7813
7814 // Split the select block, according to how many (if any) values go on each
7815 // side.
7816 BasicBlock *StartBlock = SI->getParent();
7817 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7818 // We should split before any debug-info.
7819 SplitPt.setHeadBit(true);
7820
7821 IRBuilder<> IB(SI);
7822 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7823
7824 BasicBlock *TrueBlock = nullptr;
7825 BasicBlock *FalseBlock = nullptr;
7826 BasicBlock *EndBlock = nullptr;
7827 UncondBrInst *TrueBranch = nullptr;
7828 UncondBrInst *FalseBranch = nullptr;
7829 if (TrueInstrs.size() == 0) {
7830 FalseBranch = cast<UncondBrInst>(
7831 SplitBlockAndInsertIfElse(CondFr, SplitPt, false, nullptr, DTU, LI));
7832 FalseBlock = FalseBranch->getParent();
7833 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7834 } else if (FalseInstrs.size() == 0) {
7835 TrueBranch = cast<UncondBrInst>(
7836 SplitBlockAndInsertIfThen(CondFr, SplitPt, false, nullptr, DTU, LI));
7837 TrueBlock = TrueBranch->getParent();
7838 EndBlock = TrueBranch->getSuccessor();
7839 } else {
7840 Instruction *ThenTerm = nullptr;
7841 Instruction *ElseTerm = nullptr;
7842 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7843 nullptr, DTU, LI);
7844 TrueBranch = cast<UncondBrInst>(ThenTerm);
7845 FalseBranch = cast<UncondBrInst>(ElseTerm);
7846 TrueBlock = TrueBranch->getParent();
7847 FalseBlock = FalseBranch->getParent();
7848 EndBlock = TrueBranch->getSuccessor();
7849 }
7850
7851 EndBlock->setName("select.end");
7852 if (TrueBlock)
7853 TrueBlock->setName("select.true.sink");
7854 if (FalseBlock)
7855 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7856 : "select.false.sink");
7857
7858 if (IsHugeFunc) {
7859 if (TrueBlock)
7860 FreshBBs.insert(TrueBlock);
7861 if (FalseBlock)
7862 FreshBBs.insert(FalseBlock);
7863 FreshBBs.insert(EndBlock);
7864 }
7865
7866 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7867
7868 static const unsigned MD[] = {
7869 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7870 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7871 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7872
7873 // Sink expensive instructions into the conditional blocks to avoid executing
7874 // them speculatively.
7875 for (Instruction *I : TrueInstrs)
7876 I->moveBefore(TrueBranch->getIterator());
7877 for (Instruction *I : FalseInstrs)
7878 I->moveBefore(FalseBranch->getIterator());
7879
7880 // If we did not create a new block for one of the 'true' or 'false' paths
7881 // of the condition, it means that side of the branch goes to the end block
7882 // directly and the path originates from the start block from the point of
7883 // view of the new PHI.
7884 if (TrueBlock == nullptr)
7885 TrueBlock = StartBlock;
7886 else if (FalseBlock == nullptr)
7887 FalseBlock = StartBlock;
7888
7889 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7890 // Use reverse iterator because later select may use the value of the
7891 // earlier select, and we need to propagate value through earlier select
7892 // to get the PHI operand.
7893 for (SelectInst *SI : llvm::reverse(ASI)) {
7894 // The select itself is replaced with a PHI Node.
7895 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7896 PN->insertBefore(EndBlock->begin());
7897 PN->takeName(SI);
7898 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7899 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7900 PN->setDebugLoc(SI->getDebugLoc());
7901
7902 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7903 SI->eraseFromParent();
7904 INS.erase(SI);
7905 ++NumSelectsExpanded;
7906 }
7907
7908 // Instruct OptimizeBlock to skip to the next block.
7909 CurInstIterator = StartBlock->end();
7910 return true;
7911}
7912
7913/// Some targets only accept certain types for splat inputs. For example a VDUP
7914/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7915/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7916bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7917 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7919 m_Undef(), m_ZeroMask())))
7920 return false;
7921 Type *NewType = TLI->shouldConvertSplatType(SVI);
7922 if (!NewType)
7923 return false;
7924
7925 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7926 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7927 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7928 "Expected a type of the same size!");
7929 auto *NewVecType =
7930 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7931
7932 // Create a bitcast (shuffle (insert (bitcast(..))))
7933 IRBuilder<> Builder(SVI->getContext());
7934 Builder.SetInsertPoint(SVI);
7935 Value *BC1 = Builder.CreateBitCast(
7936 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7937 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7938 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7939
7940 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7942 SVI, TLInfo, nullptr,
7943 [&](Value *V) { removeAllAssertingVHReferences(V); });
7944
7945 // Also hoist the bitcast up to its operand if it they are not in the same
7946 // block.
7947 if (auto *BCI = dyn_cast<Instruction>(BC1))
7948 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7949 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7950 !Op->isTerminator() && !Op->isEHPad())
7951 BCI->moveAfter(Op);
7952
7953 return true;
7954}
7955
7956bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7957 // If the operands of I can be folded into a target instruction together with
7958 // I, duplicate and sink them.
7959 SmallVector<Use *, 4> OpsToSink;
7960 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7961 return false;
7962
7963 // OpsToSink can contain multiple uses in a use chain (e.g.
7964 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7965 // uses must come first, so we process the ops in reverse order so as to not
7966 // create invalid IR.
7967 BasicBlock *TargetBB = I->getParent();
7968 bool Changed = false;
7969 SmallVector<Use *, 4> ToReplace;
7970 Instruction *InsertPoint = I;
7971 DenseMap<const Instruction *, unsigned long> InstOrdering;
7972 unsigned long InstNumber = 0;
7973 for (const auto &I : *TargetBB)
7974 InstOrdering[&I] = InstNumber++;
7975
7976 for (Use *U : reverse(OpsToSink)) {
7977 auto *UI = cast<Instruction>(U->get());
7978 if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
7979 continue;
7980 if (UI->getParent() == TargetBB) {
7981 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7982 InsertPoint = UI;
7983 continue;
7984 }
7985 ToReplace.push_back(U);
7986 }
7987
7988 SetVector<Instruction *> MaybeDead;
7989 DenseMap<Instruction *, Instruction *> NewInstructions;
7990 for (Use *U : ToReplace) {
7991 auto *UI = cast<Instruction>(U->get());
7992 Instruction *NI = UI->clone();
7993
7994 if (IsHugeFunc) {
7995 // Now we clone an instruction, its operands' defs may sink to this BB
7996 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7997 for (Value *Op : NI->operands())
7998 if (auto *OpDef = dyn_cast<Instruction>(Op))
7999 FreshBBs.insert(OpDef->getParent());
8000 }
8001
8002 NewInstructions[UI] = NI;
8003 MaybeDead.insert(UI);
8004 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
8005 NI->insertBefore(InsertPoint->getIterator());
8006 InsertPoint = NI;
8007 InsertedInsts.insert(NI);
8008
8009 // Update the use for the new instruction, making sure that we update the
8010 // sunk instruction uses, if it is part of a chain that has already been
8011 // sunk.
8012 Instruction *OldI = cast<Instruction>(U->getUser());
8013 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
8014 It->second->setOperand(U->getOperandNo(), NI);
8015 else
8016 U->set(NI);
8017 Changed = true;
8018 }
8019
8020 // Remove instructions that are dead after sinking.
8021 for (auto *I : MaybeDead) {
8022 if (!I->hasNUsesOrMore(1)) {
8023 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
8024 I->eraseFromParent();
8025 }
8026 }
8027
8028 return Changed;
8029}
8030
8031bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
8032 Value *Cond = SI->getCondition();
8033 Type *OldType = Cond->getType();
8034 LLVMContext &Context = Cond->getContext();
8035 EVT OldVT = TLI->getValueType(*DL, OldType);
8037 unsigned RegWidth = RegType.getSizeInBits();
8038
8039 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
8040 return false;
8041
8042 // If the register width is greater than the type width, expand the condition
8043 // of the switch instruction and each case constant to the width of the
8044 // register. By widening the type of the switch condition, subsequent
8045 // comparisons (for case comparisons) will not need to be extended to the
8046 // preferred register width, so we will potentially eliminate N-1 extends,
8047 // where N is the number of cases in the switch.
8048 auto *NewType = Type::getIntNTy(Context, RegWidth);
8049
8050 // Extend the switch condition and case constants using the target preferred
8051 // extend unless the switch condition is a function argument with an extend
8052 // attribute. In that case, we can avoid an unnecessary mask/extension by
8053 // matching the argument extension instead.
8054 Instruction::CastOps ExtType = Instruction::ZExt;
8055 // Some targets prefer SExt over ZExt.
8056 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
8057 ExtType = Instruction::SExt;
8058
8059 if (auto *Arg = dyn_cast<Argument>(Cond)) {
8060 if (Arg->hasSExtAttr())
8061 ExtType = Instruction::SExt;
8062 if (Arg->hasZExtAttr())
8063 ExtType = Instruction::ZExt;
8064 }
8065
8066 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
8067 ExtInst->insertBefore(SI->getIterator());
8068 ExtInst->setDebugLoc(SI->getDebugLoc());
8069 SI->setCondition(ExtInst);
8070 for (auto Case : SI->cases()) {
8071 const APInt &NarrowConst = Case.getCaseValue()->getValue();
8072 APInt WideConst = (ExtType == Instruction::ZExt)
8073 ? NarrowConst.zext(RegWidth)
8074 : NarrowConst.sext(RegWidth);
8075 Case.setValue(ConstantInt::get(Context, WideConst));
8076 }
8077
8078 return true;
8079}
8080
8081bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
8082 // The SCCP optimization tends to produce code like this:
8083 // switch(x) { case 42: phi(42, ...) }
8084 // Materializing the constant for the phi-argument needs instructions; So we
8085 // change the code to:
8086 // switch(x) { case 42: phi(x, ...) }
8087
8088 Value *Condition = SI->getCondition();
8089 // Avoid endless loop in degenerate case.
8090 if (isa<ConstantInt>(*Condition))
8091 return false;
8092
8093 bool Changed = false;
8094 BasicBlock *SwitchBB = SI->getParent();
8095 Type *ConditionType = Condition->getType();
8096
8097 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
8098 ConstantInt *CaseValue = Case.getCaseValue();
8099 BasicBlock *CaseBB = Case.getCaseSuccessor();
8100 // Set to true if we previously checked that `CaseBB` is only reached by
8101 // a single case from this switch.
8102 bool CheckedForSinglePred = false;
8103 for (PHINode &PHI : CaseBB->phis()) {
8104 Type *PHIType = PHI.getType();
8105 // If ZExt is free then we can also catch patterns like this:
8106 // switch((i32)x) { case 42: phi((i64)42, ...); }
8107 // and replace `(i64)42` with `zext i32 %x to i64`.
8108 bool TryZExt =
8109 PHIType->isIntegerTy() &&
8110 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
8111 TLI->isZExtFree(ConditionType, PHIType);
8112 if (PHIType == ConditionType || TryZExt) {
8113 // Set to true to skip this case because of multiple preds.
8114 bool SkipCase = false;
8115 Value *Replacement = nullptr;
8116 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
8117 Value *PHIValue = PHI.getIncomingValue(I);
8118 if (PHIValue != CaseValue) {
8119 if (!TryZExt)
8120 continue;
8121 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
8122 if (!PHIValueInt ||
8123 PHIValueInt->getValue() !=
8124 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
8125 continue;
8126 }
8127 if (PHI.getIncomingBlock(I) != SwitchBB)
8128 continue;
8129 // We cannot optimize if there are multiple case labels jumping to
8130 // this block. This check may get expensive when there are many
8131 // case labels so we test for it last.
8132 if (!CheckedForSinglePred) {
8133 CheckedForSinglePred = true;
8134 if (SI->findCaseDest(CaseBB) == nullptr) {
8135 SkipCase = true;
8136 break;
8137 }
8138 }
8139
8140 if (Replacement == nullptr) {
8141 if (PHIValue == CaseValue) {
8142 Replacement = Condition;
8143 } else {
8144 IRBuilder<> Builder(SI);
8145 Replacement = Builder.CreateZExt(Condition, PHIType);
8146 }
8147 }
8148 PHI.setIncomingValue(I, Replacement);
8149 Changed = true;
8150 }
8151 if (SkipCase)
8152 break;
8153 }
8154 }
8155 }
8156 return Changed;
8157}
8158
8159bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
8160 bool Changed = optimizeSwitchType(SI);
8161 Changed |= optimizeSwitchPhiConstants(SI);
8162 return Changed;
8163}
8164
8165namespace {
8166
8167/// Helper class to promote a scalar operation to a vector one.
8168/// This class is used to move downward extractelement transition.
8169/// E.g.,
8170/// a = vector_op <2 x i32>
8171/// b = extractelement <2 x i32> a, i32 0
8172/// c = scalar_op b
8173/// store c
8174///
8175/// =>
8176/// a = vector_op <2 x i32>
8177/// c = vector_op a (equivalent to scalar_op on the related lane)
8178/// * d = extractelement <2 x i32> c, i32 0
8179/// * store d
8180/// Assuming both extractelement and store can be combine, we get rid of the
8181/// transition.
8182class VectorPromoteHelper {
8183 /// DataLayout associated with the current module.
8184 const DataLayout &DL;
8185
8186 /// Used to perform some checks on the legality of vector operations.
8187 const TargetLowering &TLI;
8188
8189 /// Used to estimated the cost of the promoted chain.
8190 const TargetTransformInfo &TTI;
8191
8192 /// The transition being moved downwards.
8193 Instruction *Transition;
8194
8195 /// The sequence of instructions to be promoted.
8196 SmallVector<Instruction *, 4> InstsToBePromoted;
8197
8198 /// Cost of combining a store and an extract.
8199 unsigned StoreExtractCombineCost;
8200
8201 /// Instruction that will be combined with the transition.
8202 Instruction *CombineInst = nullptr;
8203
8204 /// The instruction that represents the current end of the transition.
8205 /// Since we are faking the promotion until we reach the end of the chain
8206 /// of computation, we need a way to get the current end of the transition.
8207 Instruction *getEndOfTransition() const {
8208 if (InstsToBePromoted.empty())
8209 return Transition;
8210 return InstsToBePromoted.back();
8211 }
8212
8213 /// Return the index of the original value in the transition.
8214 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8215 /// c, is at index 0.
8216 unsigned getTransitionOriginalValueIdx() const {
8217 assert(isa<ExtractElementInst>(Transition) &&
8218 "Other kind of transitions are not supported yet");
8219 return 0;
8220 }
8221
8222 /// Return the index of the index in the transition.
8223 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8224 /// is at index 1.
8225 unsigned getTransitionIdx() const {
8226 assert(isa<ExtractElementInst>(Transition) &&
8227 "Other kind of transitions are not supported yet");
8228 return 1;
8229 }
8230
8231 /// Get the type of the transition.
8232 /// This is the type of the original value.
8233 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8234 /// transition is <2 x i32>.
8235 Type *getTransitionType() const {
8236 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8237 }
8238
8239 /// Promote \p ToBePromoted by moving \p Def downward through.
8240 /// I.e., we have the following sequence:
8241 /// Def = Transition <ty1> a to <ty2>
8242 /// b = ToBePromoted <ty2> Def, ...
8243 /// =>
8244 /// b = ToBePromoted <ty1> a, ...
8245 /// Def = Transition <ty1> ToBePromoted to <ty2>
8246 void promoteImpl(Instruction *ToBePromoted);
8247
8248 /// Check whether or not it is profitable to promote all the
8249 /// instructions enqueued to be promoted.
8250 bool isProfitableToPromote() {
8251 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8252 unsigned Index = isa<ConstantInt>(ValIdx)
8253 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8254 : -1;
8255 Type *PromotedType = getTransitionType();
8256
8257 StoreInst *ST = cast<StoreInst>(CombineInst);
8258 unsigned AS = ST->getPointerAddressSpace();
8259 // Check if this store is supported.
8261 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8262 ST->getAlign())) {
8263 // If this is not supported, there is no way we can combine
8264 // the extract with the store.
8265 return false;
8266 }
8267
8268 // The scalar chain of computation has to pay for the transition
8269 // scalar to vector.
8270 // The vector chain has to account for the combining cost.
8273 InstructionCost ScalarCost =
8274 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8275 InstructionCost VectorCost = StoreExtractCombineCost;
8276 for (const auto &Inst : InstsToBePromoted) {
8277 // Compute the cost.
8278 // By construction, all instructions being promoted are arithmetic ones.
8279 // Moreover, one argument is a constant that can be viewed as a splat
8280 // constant.
8281 Value *Arg0 = Inst->getOperand(0);
8282 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8283 isa<ConstantFP>(Arg0);
8284 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8285 if (IsArg0Constant)
8287 else
8289
8290 ScalarCost += TTI.getArithmeticInstrCost(
8291 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8292 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8293 CostKind, Arg0Info, Arg1Info);
8294 }
8295 LLVM_DEBUG(
8296 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8297 << ScalarCost << "\nVector: " << VectorCost << '\n');
8298 return ScalarCost > VectorCost;
8299 }
8300
8301 /// Generate a constant vector with \p Val with the same
8302 /// number of elements as the transition.
8303 /// \p UseSplat defines whether or not \p Val should be replicated
8304 /// across the whole vector.
8305 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8306 /// otherwise we generate a vector with as many poison as possible:
8307 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8308 /// used at the index of the extract.
8309 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8310 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8311 if (!UseSplat) {
8312 // If we cannot determine where the constant must be, we have to
8313 // use a splat constant.
8314 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8315 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8316 ExtractIdx = CstVal->getSExtValue();
8317 else
8318 UseSplat = true;
8319 }
8320
8321 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8322 if (UseSplat)
8323 return ConstantVector::getSplat(EC, Val);
8324
8325 if (!EC.isScalable()) {
8326 SmallVector<Constant *, 4> ConstVec;
8327 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8328 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8329 if (Idx == ExtractIdx)
8330 ConstVec.push_back(Val);
8331 else
8332 ConstVec.push_back(PoisonVal);
8333 }
8334 return ConstantVector::get(ConstVec);
8335 } else
8337 "Generate scalable vector for non-splat is unimplemented");
8338 }
8339
8340 /// Check if promoting to a vector type an operand at \p OperandIdx
8341 /// in \p Use can trigger undefined behavior.
8342 static bool canCauseUndefinedBehavior(const Instruction *Use,
8343 unsigned OperandIdx) {
8344 // This is not safe to introduce undef when the operand is on
8345 // the right hand side of a division-like instruction.
8346 if (OperandIdx != 1)
8347 return false;
8348 switch (Use->getOpcode()) {
8349 default:
8350 return false;
8351 case Instruction::SDiv:
8352 case Instruction::UDiv:
8353 case Instruction::SRem:
8354 case Instruction::URem:
8355 return true;
8356 case Instruction::FDiv:
8357 case Instruction::FRem:
8358 return !Use->hasNoNaNs();
8359 }
8360 llvm_unreachable(nullptr);
8361 }
8362
8363public:
8364 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8365 const TargetTransformInfo &TTI, Instruction *Transition,
8366 unsigned CombineCost)
8367 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8368 StoreExtractCombineCost(CombineCost) {
8369 assert(Transition && "Do not know how to promote null");
8370 }
8371
8372 /// Check if we can promote \p ToBePromoted to \p Type.
8373 bool canPromote(const Instruction *ToBePromoted) const {
8374 // We could support CastInst too.
8375 return isa<BinaryOperator>(ToBePromoted);
8376 }
8377
8378 /// Check if it is profitable to promote \p ToBePromoted
8379 /// by moving downward the transition through.
8380 bool shouldPromote(const Instruction *ToBePromoted) const {
8381 // Promote only if all the operands can be statically expanded.
8382 // Indeed, we do not want to introduce any new kind of transitions.
8383 for (const Use &U : ToBePromoted->operands()) {
8384 const Value *Val = U.get();
8385 if (Val == getEndOfTransition()) {
8386 // If the use is a division and the transition is on the rhs,
8387 // we cannot promote the operation, otherwise we may create a
8388 // division by zero.
8389 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8390 return false;
8391 continue;
8392 }
8393 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8394 !isa<ConstantFP>(Val))
8395 return false;
8396 }
8397 // Check that the resulting operation is legal.
8398 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8399 if (!ISDOpcode)
8400 return false;
8401 return StressStoreExtract ||
8403 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8404 }
8405
8406 /// Check whether or not \p Use can be combined
8407 /// with the transition.
8408 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8409 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8410
8411 /// Record \p ToBePromoted as part of the chain to be promoted.
8412 void enqueueForPromotion(Instruction *ToBePromoted) {
8413 InstsToBePromoted.push_back(ToBePromoted);
8414 }
8415
8416 /// Set the instruction that will be combined with the transition.
8417 void recordCombineInstruction(Instruction *ToBeCombined) {
8418 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8419 CombineInst = ToBeCombined;
8420 }
8421
8422 /// Promote all the instructions enqueued for promotion if it is
8423 /// is profitable.
8424 /// \return True if the promotion happened, false otherwise.
8425 bool promote() {
8426 // Check if there is something to promote.
8427 // Right now, if we do not have anything to combine with,
8428 // we assume the promotion is not profitable.
8429 if (InstsToBePromoted.empty() || !CombineInst)
8430 return false;
8431
8432 // Check cost.
8433 if (!StressStoreExtract && !isProfitableToPromote())
8434 return false;
8435
8436 // Promote.
8437 for (auto &ToBePromoted : InstsToBePromoted)
8438 promoteImpl(ToBePromoted);
8439 InstsToBePromoted.clear();
8440 return true;
8441 }
8442};
8443
8444} // end anonymous namespace
8445
8446void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8447 // At this point, we know that all the operands of ToBePromoted but Def
8448 // can be statically promoted.
8449 // For Def, we need to use its parameter in ToBePromoted:
8450 // b = ToBePromoted ty1 a
8451 // Def = Transition ty1 b to ty2
8452 // Move the transition down.
8453 // 1. Replace all uses of the promoted operation by the transition.
8454 // = ... b => = ... Def.
8455 assert(ToBePromoted->getType() == Transition->getType() &&
8456 "The type of the result of the transition does not match "
8457 "the final type");
8458 ToBePromoted->replaceAllUsesWith(Transition);
8459 // 2. Update the type of the uses.
8460 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8461 Type *TransitionTy = getTransitionType();
8462 ToBePromoted->mutateType(TransitionTy);
8463 // 3. Update all the operands of the promoted operation with promoted
8464 // operands.
8465 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8466 for (Use &U : ToBePromoted->operands()) {
8467 Value *Val = U.get();
8468 Value *NewVal = nullptr;
8469 if (Val == Transition)
8470 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8471 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8472 isa<ConstantFP>(Val)) {
8473 // Use a splat constant if it is not safe to use undef.
8474 NewVal = getConstantVector(
8475 cast<Constant>(Val),
8476 isa<UndefValue>(Val) ||
8477 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8478 } else
8479 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8480 "this?");
8481 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8482 }
8483 Transition->moveAfter(ToBePromoted);
8484 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8485}
8486
8487/// Some targets can do store(extractelement) with one instruction.
8488/// Try to push the extractelement towards the stores when the target
8489/// has this feature and this is profitable.
8490bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8491 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8492 if (DisableStoreExtract ||
8495 Inst->getOperand(1), CombineCost)))
8496 return false;
8497
8498 // At this point we know that Inst is a vector to scalar transition.
8499 // Try to move it down the def-use chain, until:
8500 // - We can combine the transition with its single use
8501 // => we got rid of the transition.
8502 // - We escape the current basic block
8503 // => we would need to check that we are moving it at a cheaper place and
8504 // we do not do that for now.
8505 BasicBlock *Parent = Inst->getParent();
8506 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8507 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8508 // If the transition has more than one use, assume this is not going to be
8509 // beneficial.
8510 while (Inst->hasOneUse()) {
8511 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8512 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8513
8514 if (ToBePromoted->getParent() != Parent) {
8515 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8516 << ToBePromoted->getParent()->getName()
8517 << ") than the transition (" << Parent->getName()
8518 << ").\n");
8519 return false;
8520 }
8521
8522 if (VPH.canCombine(ToBePromoted)) {
8523 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8524 << "will be combined with: " << *ToBePromoted << '\n');
8525 VPH.recordCombineInstruction(ToBePromoted);
8526 bool Changed = VPH.promote();
8527 NumStoreExtractExposed += Changed;
8528 return Changed;
8529 }
8530
8531 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8532 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8533 return false;
8534
8535 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8536
8537 VPH.enqueueForPromotion(ToBePromoted);
8538 Inst = ToBePromoted;
8539 }
8540 return false;
8541}
8542
8543/// For the instruction sequence of store below, F and I values
8544/// are bundled together as an i64 value before being stored into memory.
8545/// Sometimes it is more efficient to generate separate stores for F and I,
8546/// which can remove the bitwise instructions or sink them to colder places.
8547///
8548/// (store (or (zext (bitcast F to i32) to i64),
8549/// (shl (zext I to i64), 32)), addr) -->
8550/// (store F, addr) and (store I, addr+4)
8551///
8552/// Similarly, splitting for other merged store can also be beneficial, like:
8553/// For pair of {i32, i32}, i64 store --> two i32 stores.
8554/// For pair of {i32, i16}, i64 store --> two i32 stores.
8555/// For pair of {i16, i16}, i32 store --> two i16 stores.
8556/// For pair of {i16, i8}, i32 store --> two i16 stores.
8557/// For pair of {i8, i8}, i16 store --> two i8 stores.
8558///
8559/// We allow each target to determine specifically which kind of splitting is
8560/// supported.
8561///
8562/// The store patterns are commonly seen from the simple code snippet below
8563/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8564/// void goo(const std::pair<int, float> &);
8565/// hoo() {
8566/// ...
8567/// goo(std::make_pair(tmp, ftmp));
8568/// ...
8569/// }
8570///
8571/// Although we already have similar splitting in DAG Combine, we duplicate
8572/// it in CodeGenPrepare to catch the case in which pattern is across
8573/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8574/// during code expansion.
8576 const TargetLowering &TLI) {
8577 // Handle simple but common cases only.
8578 Type *StoreType = SI.getValueOperand()->getType();
8579
8580 // The code below assumes shifting a value by <number of bits>,
8581 // whereas scalable vectors would have to be shifted by
8582 // <2log(vscale) + number of bits> in order to store the
8583 // low/high parts. Bailing out for now.
8584 if (StoreType->isScalableTy())
8585 return false;
8586
8587 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8588 DL.getTypeSizeInBits(StoreType) == 0)
8589 return false;
8590
8591 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8592 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8593 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8594 return false;
8595
8596 // Don't split the store if it is volatile.
8597 if (SI.isVolatile())
8598 return false;
8599
8600 // Match the following patterns:
8601 // (store (or (zext LValue to i64),
8602 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8603 // or
8604 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8605 // (zext LValue to i64),
8606 // Expect both operands of OR and the first operand of SHL have only
8607 // one use.
8608 Value *LValue, *HValue;
8609 if (!match(SI.getValueOperand(),
8612 m_SpecificInt(HalfValBitSize))))))
8613 return false;
8614
8615 // Check LValue and HValue are int with size less or equal than 32.
8616 if (!LValue->getType()->isIntegerTy() ||
8617 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8618 !HValue->getType()->isIntegerTy() ||
8619 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8620 return false;
8621
8622 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8623 // as the input of target query.
8624 auto *LBC = dyn_cast<BitCastInst>(LValue);
8625 auto *HBC = dyn_cast<BitCastInst>(HValue);
8626 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8627 : EVT::getEVT(LValue->getType());
8628 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8629 : EVT::getEVT(HValue->getType());
8630 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8631 return false;
8632
8633 // Start to split store.
8634 IRBuilder<> Builder(SI.getContext());
8635 Builder.SetInsertPoint(&SI);
8636
8637 // If LValue/HValue is a bitcast in another BB, create a new one in current
8638 // BB so it may be merged with the splitted stores by dag combiner.
8639 if (LBC && LBC->getParent() != SI.getParent())
8640 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8641 if (HBC && HBC->getParent() != SI.getParent())
8642 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8643
8644 bool IsLE = SI.getDataLayout().isLittleEndian();
8645 auto CreateSplitStore = [&](Value *V, bool Upper) {
8646 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8647 Value *Addr = SI.getPointerOperand();
8648 Align Alignment = SI.getAlign();
8649 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8650 if (IsOffsetStore) {
8651 Addr = Builder.CreateGEP(
8652 SplitStoreType, Addr,
8653 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8654
8655 // When splitting the store in half, naturally one half will retain the
8656 // alignment of the original wider store, regardless of whether it was
8657 // over-aligned or not, while the other will require adjustment.
8658 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8659 }
8660 Builder.CreateAlignedStore(V, Addr, Alignment);
8661 };
8662
8663 CreateSplitStore(LValue, false);
8664 CreateSplitStore(HValue, true);
8665
8666 // Delete the old store.
8667 SI.eraseFromParent();
8668 return true;
8669}
8670
8671// Return true if the GEP has two operands, the first operand is of a sequential
8672// type, and the second operand is a constant.
8675 return GEP->getNumOperands() == 2 && I.isSequential() &&
8676 isa<ConstantInt>(GEP->getOperand(1));
8677}
8678
8679// Try unmerging GEPs to reduce liveness interference (register pressure) across
8680// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8681// reducing liveness interference across those edges benefits global register
8682// allocation. Currently handles only certain cases.
8683//
8684// For example, unmerge %GEPI and %UGEPI as below.
8685//
8686// ---------- BEFORE ----------
8687// SrcBlock:
8688// ...
8689// %GEPIOp = ...
8690// ...
8691// %GEPI = gep %GEPIOp, Idx
8692// ...
8693// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8694// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8695// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8696// %UGEPI)
8697//
8698// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8699// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8700// ...
8701//
8702// DstBi:
8703// ...
8704// %UGEPI = gep %GEPIOp, UIdx
8705// ...
8706// ---------------------------
8707//
8708// ---------- AFTER ----------
8709// SrcBlock:
8710// ... (same as above)
8711// (* %GEPI is still alive on the indirectbr edges)
8712// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8713// unmerging)
8714// ...
8715//
8716// DstBi:
8717// ...
8718// %UGEPI = gep %GEPI, (UIdx-Idx)
8719// ...
8720// ---------------------------
8721//
8722// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8723// no longer alive on them.
8724//
8725// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8726// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8727// not to disable further simplications and optimizations as a result of GEP
8728// merging.
8729//
8730// Note this unmerging may increase the length of the data flow critical path
8731// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8732// between the register pressure and the length of data-flow critical
8733// path. Restricting this to the uncommon IndirectBr case would minimize the
8734// impact of potentially longer critical path, if any, and the impact on compile
8735// time.
8737 const TargetTransformInfo *TTI) {
8738 BasicBlock *SrcBlock = GEPI->getParent();
8739 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8740 // (non-IndirectBr) cases exit early here.
8741 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8742 return false;
8743 // Check that GEPI is a simple gep with a single constant index.
8744 if (!GEPSequentialConstIndexed(GEPI))
8745 return false;
8746 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8747 // Check that GEPI is a cheap one.
8748 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8751 return false;
8752 Value *GEPIOp = GEPI->getOperand(0);
8753 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8754 if (!isa<Instruction>(GEPIOp))
8755 return false;
8756 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8757 if (GEPIOpI->getParent() != SrcBlock)
8758 return false;
8759 // Check that GEP is used outside the block, meaning it's alive on the
8760 // IndirectBr edge(s).
8761 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8762 if (auto *I = dyn_cast<Instruction>(Usr)) {
8763 if (I->getParent() != SrcBlock) {
8764 return true;
8765 }
8766 }
8767 return false;
8768 }))
8769 return false;
8770 // The second elements of the GEP chains to be unmerged.
8771 std::vector<GetElementPtrInst *> UGEPIs;
8772 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8773 // on IndirectBr edges.
8774 for (User *Usr : GEPIOp->users()) {
8775 if (Usr == GEPI)
8776 continue;
8777 // Check if Usr is an Instruction. If not, give up.
8778 if (!isa<Instruction>(Usr))
8779 return false;
8780 auto *UI = cast<Instruction>(Usr);
8781 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8782 if (UI->getParent() == SrcBlock)
8783 continue;
8784 // Check if Usr is a GEP. If not, give up.
8785 if (!isa<GetElementPtrInst>(Usr))
8786 return false;
8787 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8788 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8789 // the pointer operand to it. If so, record it in the vector. If not, give
8790 // up.
8791 if (!GEPSequentialConstIndexed(UGEPI))
8792 return false;
8793 if (UGEPI->getOperand(0) != GEPIOp)
8794 return false;
8795 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8796 return false;
8797 if (GEPIIdx->getType() !=
8798 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8799 return false;
8800 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8801 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8804 return false;
8805 UGEPIs.push_back(UGEPI);
8806 }
8807 if (UGEPIs.size() == 0)
8808 return false;
8809 // Check the materializing cost of (Uidx-Idx).
8810 for (GetElementPtrInst *UGEPI : UGEPIs) {
8811 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8812 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8814 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8815 if (ImmCost > TargetTransformInfo::TCC_Basic)
8816 return false;
8817 }
8818 // Now unmerge between GEPI and UGEPIs.
8819 for (GetElementPtrInst *UGEPI : UGEPIs) {
8820 UGEPI->setOperand(0, GEPI);
8821 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8822 Constant *NewUGEPIIdx = ConstantInt::get(
8823 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8824 UGEPI->setOperand(1, NewUGEPIIdx);
8825 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8826 // inbounds to avoid UB.
8827 if (!GEPI->isInBounds()) {
8828 UGEPI->setIsInBounds(false);
8829 }
8830 }
8831 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8832 // alive on IndirectBr edges).
8833 assert(llvm::none_of(GEPIOp->users(),
8834 [&](User *Usr) {
8835 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8836 }) &&
8837 "GEPIOp is used outside SrcBlock");
8838 return true;
8839}
8840
8841static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI,
8843 bool IsHugeFunc) {
8844 // Try and convert
8845 // %c = icmp ult %x, 8
8846 // br %c, bla, blb
8847 // %tc = lshr %x, 3
8848 // to
8849 // %tc = lshr %x, 3
8850 // %c = icmp eq %tc, 0
8851 // br %c, bla, blb
8852 // Creating the cmp to zero can be better for the backend, especially if the
8853 // lshr produces flags that can be used automatically.
8854 if (!TLI.preferZeroCompareBranch())
8855 return false;
8856
8857 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8858 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8859 return false;
8860
8861 Value *X = Cmp->getOperand(0);
8862 if (!X->hasUseList())
8863 return false;
8864
8865 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8866
8867 for (auto *U : X->users()) {
8869 // A quick dominance check
8870 if (!UI ||
8871 (UI->getParent() != Branch->getParent() &&
8872 UI->getParent() != Branch->getSuccessor(0) &&
8873 UI->getParent() != Branch->getSuccessor(1)) ||
8874 (UI->getParent() != Branch->getParent() &&
8875 !UI->getParent()->getSinglePredecessor()))
8876 continue;
8877
8878 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8879 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8880 IRBuilder<> Builder(Branch);
8881 if (UI->getParent() != Branch->getParent())
8882 UI->moveBefore(Branch->getIterator());
8884 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8885 ConstantInt::get(UI->getType(), 0));
8886 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8887 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8888 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8889 return true;
8890 }
8891 if (Cmp->isEquality() &&
8892 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8893 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8894 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8895 IRBuilder<> Builder(Branch);
8896 if (UI->getParent() != Branch->getParent())
8897 UI->moveBefore(Branch->getIterator());
8899 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8900 ConstantInt::get(UI->getType(), 0));
8901 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8902 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8903 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8904 return true;
8905 }
8906 }
8907 return false;
8908}
8909
8910bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8911 bool AnyChange = false;
8912 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8913
8914 // Bail out if we inserted the instruction to prevent optimizations from
8915 // stepping on each other's toes.
8916 if (InsertedInsts.count(I))
8917 return AnyChange;
8918
8919 // TODO: Move into the switch on opcode below here.
8920 if (PHINode *P = dyn_cast<PHINode>(I)) {
8921 // It is possible for very late stage optimizations (such as SimplifyCFG)
8922 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8923 // trivial PHI, go ahead and zap it here.
8924 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8925 LargeOffsetGEPMap.erase(P);
8926 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8927 P->eraseFromParent();
8928 ++NumPHIsElim;
8929 return true;
8930 }
8931 return AnyChange;
8932 }
8933
8934 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8935 // If the source of the cast is a constant, then this should have
8936 // already been constant folded. The only reason NOT to constant fold
8937 // it is if something (e.g. LSR) was careful to place the constant
8938 // evaluation in a block other than then one that uses it (e.g. to hoist
8939 // the address of globals out of a loop). If this is the case, we don't
8940 // want to forward-subst the cast.
8941 if (isa<Constant>(CI->getOperand(0)))
8942 return AnyChange;
8943
8944 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8945 return true;
8946
8948 isa<TruncInst>(I)) &&
8950 I, LI->getLoopFor(I->getParent()), *TTI))
8951 return true;
8952
8953 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8954 /// Sink a zext or sext into its user blocks if the target type doesn't
8955 /// fit in one register
8956 if (TLI->getTypeAction(CI->getContext(),
8957 TLI->getValueType(*DL, CI->getType())) ==
8958 TargetLowering::TypeExpandInteger) {
8959 return SinkCast(CI);
8960 } else {
8962 I, LI->getLoopFor(I->getParent()), *TTI))
8963 return true;
8964
8965 bool MadeChange = optimizeExt(I);
8966 return MadeChange | optimizeExtUses(I);
8967 }
8968 }
8969 return AnyChange;
8970 }
8971
8972 if (auto *Cmp = dyn_cast<CmpInst>(I))
8973 if (optimizeCmp(Cmp, ModifiedDT))
8974 return true;
8975
8976 if (match(I, m_URem(m_Value(), m_Value())))
8977 if (optimizeURem(I))
8978 return true;
8979
8980 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8981 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8982 bool Modified = optimizeLoadExt(LI);
8983 unsigned AS = LI->getPointerAddressSpace();
8984 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8985 return Modified;
8986 }
8987
8988 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8989 if (splitMergedValStore(*SI, *DL, *TLI))
8990 return true;
8991 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8992 unsigned AS = SI->getPointerAddressSpace();
8993 return optimizeMemoryInst(I, SI->getOperand(1),
8994 SI->getOperand(0)->getType(), AS);
8995 }
8996
8997 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8998 unsigned AS = RMW->getPointerAddressSpace();
8999 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
9000 }
9001
9002 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
9003 unsigned AS = CmpX->getPointerAddressSpace();
9004 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
9005 CmpX->getCompareOperand()->getType(), AS);
9006 }
9007
9008 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
9009
9010 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
9011 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
9012 return true;
9013
9014 // TODO: Move this into the switch on opcode - it handles shifts already.
9015 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
9016 BinOp->getOpcode() == Instruction::LShr)) {
9017 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
9018 if (CI && TLI->hasExtractBitsInsn())
9019 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
9020 return true;
9021 }
9022
9023 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
9024 if (GEPI->hasAllZeroIndices()) {
9025 /// The GEP operand must be a pointer, so must its result -> BitCast
9026 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
9027 GEPI->getName(), GEPI->getIterator());
9028 NC->setDebugLoc(GEPI->getDebugLoc());
9029 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
9031 GEPI, TLInfo, nullptr,
9032 [&](Value *V) { removeAllAssertingVHReferences(V); });
9033 ++NumGEPsElim;
9034 optimizeInst(NC, ModifiedDT);
9035 return true;
9036 }
9038 return true;
9039 }
9040 }
9041
9042 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
9043 // freeze(icmp a, const)) -> icmp (freeze a), const
9044 // This helps generate efficient conditional jumps.
9045 Instruction *CmpI = nullptr;
9046 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
9047 CmpI = II;
9048 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
9049 CmpI = F->getFastMathFlags().none() ? F : nullptr;
9050
9051 if (CmpI && CmpI->hasOneUse()) {
9052 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
9053 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
9055 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
9057 if (Const0 || Const1) {
9058 if (!Const0 || !Const1) {
9059 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
9060 F->takeName(FI);
9061 CmpI->setOperand(Const0 ? 1 : 0, F);
9062 }
9063 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
9064 FI->eraseFromParent();
9065 return true;
9066 }
9067 }
9068 return AnyChange;
9069 }
9070
9071 if (tryToSinkFreeOperands(I))
9072 return true;
9073
9074 switch (I->getOpcode()) {
9075 case Instruction::Shl:
9076 case Instruction::LShr:
9077 case Instruction::AShr:
9078 return optimizeShiftInst(cast<BinaryOperator>(I));
9079 case Instruction::Call:
9080 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
9081 case Instruction::Select:
9082 return optimizeSelectInst(cast<SelectInst>(I));
9083 case Instruction::ShuffleVector:
9084 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
9085 case Instruction::Switch:
9086 return optimizeSwitchInst(cast<SwitchInst>(I));
9087 case Instruction::ExtractElement:
9088 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
9089 case Instruction::CondBr:
9090 return optimizeBranch(cast<CondBrInst>(I), *TLI, FreshBBs, IsHugeFunc);
9091 }
9092
9093 return AnyChange;
9094}
9095
9096/// Given an OR instruction, check to see if this is a bitreverse
9097/// idiom. If so, insert the new intrinsic and return true.
9098bool CodeGenPrepare::makeBitReverse(Instruction &I) {
9099 if (!I.getType()->isIntegerTy() ||
9101 TLI->getValueType(*DL, I.getType(), true)))
9102 return false;
9103
9104 SmallVector<Instruction *, 4> Insts;
9105 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
9106 return false;
9107 Instruction *LastInst = Insts.back();
9108 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
9110 &I, TLInfo, nullptr,
9111 [&](Value *V) { removeAllAssertingVHReferences(V); });
9112 return true;
9113}
9114
9115// In this pass we look for GEP and cast instructions that are used
9116// across basic blocks and rewrite them to improve basic-block-at-a-time
9117// selection.
9118bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
9119 SunkAddrs.clear();
9120 bool MadeChange = false;
9121
9122 do {
9123 CurInstIterator = BB.begin();
9124 ModifiedDT = ModifyDT::NotModifyDT;
9125 while (CurInstIterator != BB.end()) {
9126 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
9127 if (ModifiedDT != ModifyDT::NotModifyDT) {
9128 // For huge function we tend to quickly go though the inner optmization
9129 // opportunities in the BB. So we go back to the BB head to re-optimize
9130 // each instruction instead of go back to the function head.
9131 if (IsHugeFunc)
9132 break;
9133 return true;
9134 }
9135 }
9136 } while (ModifiedDT == ModifyDT::ModifyInstDT);
9137
9138 bool MadeBitReverse = true;
9139 while (MadeBitReverse) {
9140 MadeBitReverse = false;
9141 for (auto &I : reverse(BB)) {
9142 if (makeBitReverse(I)) {
9143 MadeBitReverse = MadeChange = true;
9144 break;
9145 }
9146 }
9147 }
9148 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
9149
9150 return MadeChange;
9151}
9152
9153bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
9154 bool AnyChange = false;
9155 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
9156 AnyChange |= fixupDbgVariableRecord(DVR);
9157 return AnyChange;
9158}
9159
9160// FIXME: should updating debug-info really cause the "changed" flag to fire,
9161// which can cause a function to be reprocessed?
9162bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
9163 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
9164 DVR.Type != DbgVariableRecord::LocationType::Assign)
9165 return false;
9166
9167 // Does this DbgVariableRecord refer to a sunk address calculation?
9168 bool AnyChange = false;
9169 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
9170 DVR.location_ops().end());
9171 for (Value *Location : LocationOps) {
9172 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
9173 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
9174 if (SunkAddr) {
9175 // Point dbg.value at locally computed address, which should give the best
9176 // opportunity to be accurately lowered. This update may change the type
9177 // of pointer being referred to; however this makes no difference to
9178 // debugging information, and we can't generate bitcasts that may affect
9179 // codegen.
9180 DVR.replaceVariableLocationOp(Location, SunkAddr);
9181 AnyChange = true;
9182 }
9183 }
9184 return AnyChange;
9185}
9186
9188 DVR->removeFromParent();
9189 BasicBlock *VIBB = VI->getParent();
9190 if (isa<PHINode>(VI))
9191 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
9192 else
9193 VIBB->insertDbgRecordAfter(DVR, &*VI);
9194}
9195
9196// A llvm.dbg.value may be using a value before its definition, due to
9197// optimizations in this pass and others. Scan for such dbg.values, and rescue
9198// them by moving the dbg.value to immediately after the value definition.
9199// FIXME: Ideally this should never be necessary, and this has the potential
9200// to re-order dbg.value intrinsics.
9201bool CodeGenPrepare::placeDbgValues(Function &F) {
9202 bool MadeChange = false;
9203 DominatorTree &DT = getDT();
9204
9205 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9206 SmallVector<Instruction *, 4> VIs;
9207 for (Value *V : DbgItem->location_ops())
9208 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9209 VIs.push_back(VI);
9210
9211 // This item may depend on multiple instructions, complicating any
9212 // potential sink. This block takes the defensive approach, opting to
9213 // "undef" the item if it has more than one instruction and any of them do
9214 // not dominate iem.
9215 for (Instruction *VI : VIs) {
9216 if (VI->isTerminator())
9217 continue;
9218
9219 // If VI is a phi in a block with an EHPad terminator, we can't insert
9220 // after it.
9221 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9222 continue;
9223
9224 // If the defining instruction dominates the dbg.value, we do not need
9225 // to move the dbg.value.
9226 if (DT.dominates(VI, Position))
9227 continue;
9228
9229 // If we depend on multiple instructions and any of them doesn't
9230 // dominate this DVI, we probably can't salvage it: moving it to
9231 // after any of the instructions could cause us to lose the others.
9232 if (VIs.size() > 1) {
9233 LLVM_DEBUG(
9234 dbgs()
9235 << "Unable to find valid location for Debug Value, undefing:\n"
9236 << *DbgItem);
9237 DbgItem->setKillLocation();
9238 break;
9239 }
9240
9241 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9242 << *DbgItem << ' ' << *VI);
9243 DbgInserterHelper(DbgItem, VI->getIterator());
9244 MadeChange = true;
9245 ++NumDbgValueMoved;
9246 }
9247 };
9248
9249 for (BasicBlock &BB : F) {
9250 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9251 // Process any DbgVariableRecord records attached to this
9252 // instruction.
9253 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9254 filterDbgVars(Insn.getDbgRecordRange()))) {
9255 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9256 continue;
9257 DbgProcessor(&DVR, &Insn);
9258 }
9259 }
9260 }
9261
9262 return MadeChange;
9263}
9264
9265// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9266// probes can be chained dependencies of other regular DAG nodes and block DAG
9267// combine optimizations.
9268bool CodeGenPrepare::placePseudoProbes(Function &F) {
9269 bool MadeChange = false;
9270 for (auto &Block : F) {
9271 // Move the rest probes to the beginning of the block.
9272 auto FirstInst = Block.getFirstInsertionPt();
9273 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9274 ++FirstInst;
9275 BasicBlock::iterator I(FirstInst);
9276 I++;
9277 while (I != Block.end()) {
9278 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9279 II->moveBefore(FirstInst);
9280 MadeChange = true;
9281 }
9282 }
9283 }
9284 return MadeChange;
9285}
9286
9287/// Scale down both weights to fit into uint32_t.
9288static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9289 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9290 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9291 NewTrue = NewTrue / Scale;
9292 NewFalse = NewFalse / Scale;
9293}
9294
9295/// Some targets prefer to split a conditional branch like:
9296/// \code
9297/// %0 = icmp ne i32 %a, 0
9298/// %1 = icmp ne i32 %b, 0
9299/// %or.cond = or i1 %0, %1
9300/// br i1 %or.cond, label %TrueBB, label %FalseBB
9301/// \endcode
9302/// into multiple branch instructions like:
9303/// \code
9304/// bb1:
9305/// %0 = icmp ne i32 %a, 0
9306/// br i1 %0, label %TrueBB, label %bb2
9307/// bb2:
9308/// %1 = icmp ne i32 %b, 0
9309/// br i1 %1, label %TrueBB, label %FalseBB
9310/// \endcode
9311/// This usually allows instruction selection to do even further optimizations
9312/// and combine the compare with the branch instruction. Currently this is
9313/// applied for targets which have "cheap" jump instructions.
9314///
9315/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9316///
9317bool CodeGenPrepare::splitBranchCondition(Function &F) {
9318 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9319 return false;
9320
9321 bool MadeChange = false;
9322 for (auto &BB : F) {
9323 // Does this BB end with the following?
9324 // %cond1 = icmp|fcmp|binary instruction ...
9325 // %cond2 = icmp|fcmp|binary instruction ...
9326 // %cond.or = or|and i1 %cond1, cond2
9327 // br i1 %cond.or label %dest1, label %dest2"
9328 Instruction *LogicOp;
9329 BasicBlock *TBB, *FBB;
9330 if (!match(BB.getTerminator(),
9331 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9332 continue;
9333
9334 auto *Br1 = cast<CondBrInst>(BB.getTerminator());
9335 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9336 continue;
9337
9338 // The merging of mostly empty BB can cause a degenerate branch.
9339 if (TBB == FBB)
9340 continue;
9341
9342 unsigned Opc;
9343 Value *Cond1, *Cond2;
9344 if (match(LogicOp,
9345 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9346 Opc = Instruction::And;
9347 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9348 m_OneUse(m_Value(Cond2)))))
9349 Opc = Instruction::Or;
9350 else
9351 continue;
9352
9353 auto IsGoodCond = [](Value *Cond) {
9354 return match(
9355 Cond,
9357 m_LogicalOr(m_Value(), m_Value()))));
9358 };
9359 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9360 continue;
9361
9362 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9363
9364 // Create a new BB.
9365 auto *TmpBB =
9366 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9367 BB.getParent(), BB.getNextNode());
9368 if (IsHugeFunc)
9369 FreshBBs.insert(TmpBB);
9370
9371 // Update original basic block by using the first condition directly by the
9372 // branch instruction and removing the no longer needed and/or instruction.
9373 Br1->setCondition(Cond1);
9374 LogicOp->eraseFromParent();
9375
9376 // Depending on the condition we have to either replace the true or the
9377 // false successor of the original branch instruction.
9378 if (Opc == Instruction::And)
9379 Br1->setSuccessor(0, TmpBB);
9380 else
9381 Br1->setSuccessor(1, TmpBB);
9382
9383 // Fill in the new basic block.
9384 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9385 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9386 I->removeFromParent();
9387 I->insertBefore(Br2->getIterator());
9388 }
9389
9390 // Update PHI nodes in both successors. The original BB needs to be
9391 // replaced in one successor's PHI nodes, because the branch comes now from
9392 // the newly generated BB (NewBB). In the other successor we need to add one
9393 // incoming edge to the PHI nodes, because both branch instructions target
9394 // now the same successor. Depending on the original branch condition
9395 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9396 // we perform the correct update for the PHI nodes.
9397 // This doesn't change the successor order of the just created branch
9398 // instruction (or any other instruction).
9399 if (Opc == Instruction::Or)
9400 std::swap(TBB, FBB);
9401
9402 // Replace the old BB with the new BB.
9403 TBB->replacePhiUsesWith(&BB, TmpBB);
9404
9405 // Add another incoming edge from the new BB.
9406 for (PHINode &PN : FBB->phis()) {
9407 auto *Val = PN.getIncomingValueForBlock(&BB);
9408 PN.addIncoming(Val, TmpBB);
9409 }
9410
9411 if (Loop *L = LI->getLoopFor(&BB))
9412 L->addBasicBlockToLoop(TmpBB, *LI);
9413
9414 // The edge we need to delete starts at BB and ends at whatever TBB ends
9415 // up pointing to.
9416 DTU->applyUpdates({{DominatorTree::Insert, &BB, TmpBB},
9417 {DominatorTree::Insert, TmpBB, TBB},
9418 {DominatorTree::Insert, TmpBB, FBB},
9419 {DominatorTree::Delete, &BB, TBB}});
9420
9421 // Update the branch weights (from SelectionDAGBuilder::
9422 // FindMergedConditions).
9423 if (Opc == Instruction::Or) {
9424 // Codegen X | Y as:
9425 // BB1:
9426 // jmp_if_X TBB
9427 // jmp TmpBB
9428 // TmpBB:
9429 // jmp_if_Y TBB
9430 // jmp FBB
9431 //
9432
9433 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9434 // The requirement is that
9435 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9436 // = TrueProb for original BB.
9437 // Assuming the original weights are A and B, one choice is to set BB1's
9438 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9439 // assumes that
9440 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9441 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9442 // TmpBB, but the math is more complicated.
9443 uint64_t TrueWeight, FalseWeight;
9444 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9445 uint64_t NewTrueWeight = TrueWeight;
9446 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9447 scaleWeights(NewTrueWeight, NewFalseWeight);
9448 Br1->setMetadata(LLVMContext::MD_prof,
9449 MDBuilder(Br1->getContext())
9450 .createBranchWeights(TrueWeight, FalseWeight,
9451 hasBranchWeightOrigin(*Br1)));
9452
9453 NewTrueWeight = TrueWeight;
9454 NewFalseWeight = 2 * FalseWeight;
9455 scaleWeights(NewTrueWeight, NewFalseWeight);
9456 Br2->setMetadata(LLVMContext::MD_prof,
9457 MDBuilder(Br2->getContext())
9458 .createBranchWeights(TrueWeight, FalseWeight));
9459 }
9460 } else {
9461 // Codegen X & Y as:
9462 // BB1:
9463 // jmp_if_X TmpBB
9464 // jmp FBB
9465 // TmpBB:
9466 // jmp_if_Y TBB
9467 // jmp FBB
9468 //
9469 // This requires creation of TmpBB after CurBB.
9470
9471 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9472 // The requirement is that
9473 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9474 // = FalseProb for original BB.
9475 // Assuming the original weights are A and B, one choice is to set BB1's
9476 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9477 // assumes that
9478 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9479 uint64_t TrueWeight, FalseWeight;
9480 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9481 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9482 uint64_t NewFalseWeight = FalseWeight;
9483 scaleWeights(NewTrueWeight, NewFalseWeight);
9484 Br1->setMetadata(LLVMContext::MD_prof,
9485 MDBuilder(Br1->getContext())
9486 .createBranchWeights(TrueWeight, FalseWeight));
9487
9488 NewTrueWeight = 2 * TrueWeight;
9489 NewFalseWeight = FalseWeight;
9490 scaleWeights(NewTrueWeight, NewFalseWeight);
9491 Br2->setMetadata(LLVMContext::MD_prof,
9492 MDBuilder(Br2->getContext())
9493 .createBranchWeights(TrueWeight, FalseWeight));
9494 }
9495 }
9496
9497 MadeChange = true;
9498
9499 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9500 TmpBB->dump());
9501 }
9502 return MadeChange;
9503}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, ExtractValueInst *&OverflowExtract)
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool despeculateCountZeros(IntrinsicInst *CountZeros, DomTreeUpdater *DTU, LoopInfo *LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1448
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
ppc ctr loops verify
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
unsigned logBase2() const
Definition APInt.h:1776
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1016
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Analysis pass which computes BranchProbabilityInfo.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Conditional Branch instruction.
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
LLVM_ABI void deleteBB(BasicBlock *DelBB)
Delete DelBB.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:60
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
bool isBBPendingDeletion(BasicBlockT *DelBB) const
Returns true if DelBB is awaiting deletion.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:345
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:563
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void analyze(const DominatorTreeBase< BlockT, false > &DomTree)
Create the loop forest using a stable algorithm.
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:194
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool erase(const T &V)
Definition SmallSet.h:200
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:767
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:65
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
BasicBlock * getSuccessor(unsigned i=0) const
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:403
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
user_iterator user_end()
Definition Value.h:411
iterator_range< use_iterator > uses()
Definition Value.h:381
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:845
user_iterator_impl< User > user_iterator
Definition Value.h:392
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::enable_if_t< std::is_signed_v< T >, T > MulOverflow(T X, T Y, T &Result)
Multiply two signed integers, computing the two's complement truncated result, returning true if an o...
Definition MathExtras.h:753
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2253
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2131
constexpr from_range_t from_range
LLVM_ABI BasicBlock * splitBlockBefore(BasicBlock *Old, BasicBlock::iterator SplitPt, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName="")
Split the specified block at the specified instruction SplitPt.
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2200
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3785
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:203
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:53
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:588
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool VerifyDomInfo
Enables verification of dominator trees.
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:256
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This contains information for each constraint that we are lowering.