31#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "amdgpu-atomic-optimizer"
43struct ReplacementInfo {
66class AMDGPUAtomicOptimizerImpl
79 Value *
const Identity)
const;
81 Value *
const Identity)
const;
84 std::pair<Value *, Value *>
90 bool ValDivergent)
const;
93 AMDGPUAtomicOptimizerImpl() =
delete;
98 :
F(
F), UA(UA),
DL(
F.getDataLayout()), DTU(DTU), ST(ST),
100 ScanImpl(ScanImpl) {}
110char AMDGPUAtomicOptimizer::ID = 0;
114bool AMDGPUAtomicOptimizer::runOnFunction(
Function &
F) {
115 if (skipFunction(
F)) {
120 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
123 getAnalysisIfAvailable<DominatorTreeWrapperPass>();
125 DomTreeUpdater::UpdateStrategy::Lazy);
131 return AMDGPUAtomicOptimizerImpl(
F, UA, DTU, ST, ScanImpl).run();
139 DomTreeUpdater::UpdateStrategy::Lazy);
142 bool IsChanged = AMDGPUAtomicOptimizerImpl(
F, UA, DTU, ST, ScanImpl).run();
153bool AMDGPUAtomicOptimizerImpl::run() {
161 if (ToReplace.empty())
164 for (
auto &[
I,
Op, ValIdx, ValDivergent] : ToReplace)
165 optimizeAtomic(*
I,
Op, ValIdx, ValDivergent);
171 switch (Ty->getTypeID()) {
176 unsigned Size = Ty->getIntegerBitWidth();
184void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &
I) {
186 switch (
I.getPointerAddressSpace()) {
217 !(
I.getType()->isFloatTy() ||
I.getType()->isDoubleTy())) {
221 const unsigned PtrIdx = 0;
222 const unsigned ValIdx = 1;
237 if (ScanImpl == ScanOptions::DPP && !ST.hasDPP())
247 ToReplace.push_back({&
I,
Op, ValIdx, ValDivergent});
250void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &
I) {
253 switch (
I.getIntrinsicID()) {
256 case Intrinsic::amdgcn_struct_buffer_atomic_add:
257 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
258 case Intrinsic::amdgcn_raw_buffer_atomic_add:
259 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
262 case Intrinsic::amdgcn_struct_buffer_atomic_sub:
263 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
264 case Intrinsic::amdgcn_raw_buffer_atomic_sub:
265 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
268 case Intrinsic::amdgcn_struct_buffer_atomic_and:
269 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
270 case Intrinsic::amdgcn_raw_buffer_atomic_and:
271 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
274 case Intrinsic::amdgcn_struct_buffer_atomic_or:
275 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
276 case Intrinsic::amdgcn_raw_buffer_atomic_or:
277 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
280 case Intrinsic::amdgcn_struct_buffer_atomic_xor:
281 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
282 case Intrinsic::amdgcn_raw_buffer_atomic_xor:
283 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
286 case Intrinsic::amdgcn_struct_buffer_atomic_smin:
287 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
288 case Intrinsic::amdgcn_raw_buffer_atomic_smin:
289 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
292 case Intrinsic::amdgcn_struct_buffer_atomic_umin:
293 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
294 case Intrinsic::amdgcn_raw_buffer_atomic_umin:
295 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
298 case Intrinsic::amdgcn_struct_buffer_atomic_smax:
299 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
300 case Intrinsic::amdgcn_raw_buffer_atomic_smax:
301 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
304 case Intrinsic::amdgcn_struct_buffer_atomic_umax:
305 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
306 case Intrinsic::amdgcn_raw_buffer_atomic_umax:
307 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
312 const unsigned ValIdx = 0;
321 if (ScanImpl == ScanOptions::DPP && !ST.hasDPP())
330 for (
unsigned Idx = 1; Idx <
I.getNumOperands(); Idx++) {
338 ToReplace.push_back({&
I,
Op, ValIdx, ValDivergent});
351 return B.CreateBinOp(Instruction::Add,
LHS,
RHS);
355 return B.CreateBinOp(Instruction::Sub,
LHS,
RHS);
359 return B.CreateBinOp(Instruction::And,
LHS,
RHS);
361 return B.CreateBinOp(Instruction::Or,
LHS,
RHS);
363 return B.CreateBinOp(Instruction::Xor,
LHS,
RHS);
378 return B.CreateMaxNum(
LHS,
RHS);
380 return B.CreateMinNum(
LHS,
RHS);
391 Value *
const Identity)
const {
392 Type *AtomicTy =
V->getType();
393 Module *
M =
B.GetInsertBlock()->getModule();
396 for (
unsigned Idx = 0; Idx < 4; Idx++) {
399 B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, AtomicTy,
400 {Identity, V, B.getInt32(DPP::ROW_XMASK0 | 1 << Idx),
401 B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}));
406 Value *Permlanex16Call =
407 B.CreateIntrinsic(AtomicTy, Intrinsic::amdgcn_permlanex16,
409 B.getInt32(0),
B.getFalse(),
B.getFalse()});
417 Value *Permlane64Call =
418 B.CreateIntrinsic(AtomicTy, Intrinsic::amdgcn_permlane64, V);
425 M, Intrinsic::amdgcn_readlane, AtomicTy);
426 Value *Lane0 =
B.CreateCall(ReadLane, {
V,
B.getInt32(0)});
427 Value *Lane32 =
B.CreateCall(ReadLane, {
V,
B.getInt32(32)});
435 Value *Identity)
const {
436 Type *AtomicTy =
V->getType();
437 Module *
M =
B.GetInsertBlock()->getModule();
439 M, Intrinsic::amdgcn_update_dpp, AtomicTy);
441 for (
unsigned Idx = 0; Idx < 4; Idx++) {
444 B.CreateCall(UpdateDPP,
445 {Identity, V, B.getInt32(DPP::ROW_SHR0 | 1 << Idx),
446 B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}));
448 if (ST.hasDPPBroadcasts()) {
452 B.CreateCall(UpdateDPP,
453 {Identity, V, B.getInt32(DPP::BCAST15), B.getInt32(0xa),
454 B.getInt32(0xf), B.getFalse()}));
457 B.CreateCall(UpdateDPP,
458 {Identity, V, B.getInt32(DPP::BCAST31), B.getInt32(0xc),
459 B.getInt32(0xf), B.getFalse()}));
468 B.CreateIntrinsic(AtomicTy, Intrinsic::amdgcn_permlanex16,
470 B.getInt32(-1),
B.getFalse(),
B.getFalse()});
472 Value *UpdateDPPCall =
B.CreateCall(
474 B.getInt32(0xa),
B.getInt32(0xf),
B.getFalse()});
479 Value *
const Lane31 =
B.CreateIntrinsic(
480 AtomicTy, Intrinsic::amdgcn_readlane, {
V,
B.getInt32(31)});
482 Value *UpdateDPPCall =
B.CreateCall(
484 B.getInt32(0xc),
B.getInt32(0xf),
B.getFalse()});
495 Value *Identity)
const {
496 Type *AtomicTy =
V->getType();
497 Module *
M =
B.GetInsertBlock()->getModule();
499 M, Intrinsic::amdgcn_update_dpp, AtomicTy);
500 if (ST.hasDPPWavefrontShifts()) {
502 V =
B.CreateCall(UpdateDPP,
504 B.getInt32(0xf),
B.getFalse()});
507 M, Intrinsic::amdgcn_readlane, AtomicTy);
509 M, Intrinsic::amdgcn_writelane, AtomicTy);
514 V =
B.CreateCall(UpdateDPP,
516 B.getInt32(0xf),
B.getInt32(0xf),
B.getFalse()});
519 V =
B.CreateCall(WriteLane, {
B.CreateCall(ReadLane, {Old,
B.getInt32(15)}),
526 {
B.CreateCall(ReadLane, {Old,
B.getInt32(31)}),
B.getInt32(32),
V});
531 {
B.CreateCall(ReadLane, {Old,
B.getInt32(47)}),
B.getInt32(48),
V});
543std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
545 Instruction &
I, BasicBlock *ComputeLoop, BasicBlock *ComputeEnd)
const {
546 auto *Ty =
I.getType();
548 auto *EntryBB =
I.getParent();
549 auto NeedResult = !
I.use_empty();
552 B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy,
B.getTrue());
555 B.SetInsertPoint(ComputeLoop);
559 PHINode *OldValuePhi =
nullptr;
561 OldValuePhi =
B.CreatePHI(Ty, 2,
"OldValuePhi");
564 auto *ActiveBits =
B.CreatePHI(WaveTy, 2,
"ActiveBits");
565 ActiveBits->addIncoming(Ballot, EntryBB);
569 B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits,
B.getTrue()});
571 auto *LaneIdxInt =
B.CreateTrunc(FF1,
B.getInt32Ty());
574 Value *LaneValue =
B.CreateIntrinsic(
V->getType(), Intrinsic::amdgcn_readlane,
579 Value *OldValue =
nullptr;
581 OldValue =
B.CreateIntrinsic(
V->getType(), Intrinsic::amdgcn_writelane,
582 {Accumulator, LaneIdxInt, OldValuePhi});
588 Accumulator->addIncoming(NewAccumulator, ComputeLoop);
592 auto *
Mask =
B.CreateShl(ConstantInt::get(WaveTy, 1), FF1);
594 auto *InverseMask =
B.CreateXor(Mask, ConstantInt::getAllOnesValue(WaveTy));
595 auto *NewActiveBits =
B.CreateAnd(ActiveBits, InverseMask);
596 ActiveBits->addIncoming(NewActiveBits, ComputeLoop);
599 auto *IsEnd =
B.CreateICmpEQ(NewActiveBits, ConstantInt::get(WaveTy, 0));
600 B.CreateCondBr(IsEnd, ComputeEnd, ComputeLoop);
602 B.SetInsertPoint(ComputeEnd);
604 return {OldValue, NewAccumulator};
610 const unsigned BitWidth = Ty->getPrimitiveSizeInBits();
646void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &
I,
649 bool ValDivergent)
const {
654 B.setIsFPConstrained(
I.getFunction()->hasFnAttribute(Attribute::StrictFP));
671 Value *
const Cond =
B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {});
679 B.SetInsertPoint(&
I);
682 Type *
const Ty =
I.getType();
685 [[maybe_unused]]
const unsigned TyBitWidth =
DL.getTypeSizeInBits(Ty);
689 Value *
V =
I.getOperand(ValIdx);
694 CallInst *
const Ballot =
695 B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy,
B.getTrue());
704 B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {Ballot,
B.getInt32(0)});
707 Value *
const ExtractHi =
B.CreateTrunc(
B.CreateLShr(Ballot, 32),
Int32Ty);
708 Mbcnt =
B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo,
709 {ExtractLo,
B.getInt32(0)});
710 Mbcnt =
B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {ExtractHi, Mbcnt});
714 LLVMContext &
C =
F->getContext();
726 Value *ExclScan =
nullptr;
727 Value *NewV =
nullptr;
729 const bool NeedResult = !
I.use_empty();
736 if (ScanImpl == ScanOptions::DPP) {
740 B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {
V, Identity});
745 NewV = buildReduction(
B, ScanOp, NewV, Identity);
747 NewV = buildScan(
B, ScanOp, NewV, Identity);
749 ExclScan = buildShiftRight(
B, NewV, Identity);
754 NewV =
B.CreateIntrinsic(Ty, Intrinsic::amdgcn_readlane,
755 {NewV, LastLaneIdx});
758 NewV =
B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
759 }
else if (ScanImpl == ScanOptions::Iterative) {
763 std::tie(ExclScan, NewV) = buildScanIteratively(
B, ScanOp, Identity, V,
I,
764 ComputeLoop, ComputeEnd);
777 Value *
const Ctpop =
B.CreateIntCast(
778 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty,
false);
784 Value *
const Ctpop =
B.CreateIntCast(
785 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot),
Int32Ty,
false);
786 Value *
const CtpopFP =
B.CreateUIToFP(Ctpop, Ty);
787 NewV =
B.CreateFMul(V, CtpopFP);
806 Value *
const Ctpop =
B.CreateIntCast(
807 B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty,
false);
816 Value *
const Cond =
B.CreateICmpEQ(Mbcnt,
B.getInt32(0));
838 if (ValDivergent && ScanImpl == ScanOptions::Iterative) {
844 B.SetInsertPoint(ComputeEnd);
846 B.Insert(Terminator);
850 B.SetInsertPoint(OriginalBB);
851 B.CreateBr(ComputeLoop);
855 {{DominatorTree::Insert, OriginalBB, ComputeLoop},
856 {DominatorTree::Insert, ComputeLoop, ComputeEnd}});
861 DomTreeUpdates.push_back({DominatorTree::Insert, ComputeEnd, Succ});
862 DomTreeUpdates.push_back({DominatorTree::Delete, OriginalBB, Succ});
867 Predecessor = ComputeEnd;
869 Predecessor = OriginalBB;
872 B.SetInsertPoint(SingleLaneTerminator);
882 B.SetInsertPoint(&
I);
886 PHINode *
const PHI =
B.CreatePHI(Ty, 2);
888 PHI->addIncoming(NewI, SingleLaneTerminator->
getParent());
895 ReadlaneVal =
B.CreateZExt(
PHI,
B.getInt32Ty());
897 Value *BroadcastI =
B.CreateIntrinsic(
898 ReadlaneVal->
getType(), Intrinsic::amdgcn_readfirstlane, ReadlaneVal);
900 BroadcastI =
B.CreateTrunc(BroadcastI, Ty);
906 Value *LaneOffset =
nullptr;
908 if (ScanImpl == ScanOptions::DPP) {
910 B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, ExclScan);
911 }
else if (ScanImpl == ScanOptions::Iterative) {
912 LaneOffset = ExclScan;
917 Mbcnt = isAtomicFloatingPointTy ?
B.CreateUIToFP(Mbcnt, Ty)
918 :
B.CreateIntCast(Mbcnt, Ty,
false);
934 LaneOffset =
B.CreateSelect(
Cond, Identity, V);
937 LaneOffset =
buildMul(
B, V,
B.CreateAnd(Mbcnt, 1));
941 LaneOffset =
B.CreateFMul(V, Mbcnt);
947 if (isAtomicFloatingPointTy) {
965 PHINode *
const PHI =
B.CreatePHI(Ty, 2);
967 PHI->addIncoming(Result,
I.getParent());
968 I.replaceAllUsesWith(
PHI);
971 I.replaceAllUsesWith(Result);
980 "AMDGPU atomic optimizations",
false,
false)
987 return new AMDGPUAtomicOptimizer(ScanStrategy);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Constant * getIdentityValueForAtomicOp(Type *const Ty, AtomicRMWInst::BinOp Op)
static bool isLegalCrossLaneType(Type *Ty)
static Value * buildMul(IRBuilder<> &B, Value *LHS, Value *RHS)
static Value * buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *LHS, Value *RHS)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
Machine Check Debug Module
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
Target-Independent Code Generator Pass Configuration Options pass.
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
unsigned getWavefrontSize() const
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
an instruction that atomically reads a memory location, combines it with another value,...
static bool isFPOperation(BinOp Op)
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
This is the shared class of boolean and integer constants.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
DominatorTree & getDomTree()
FunctionPass class - This class is used to implement most global optimizations.
bool hasPermLane64() const
bool hasPermLaneX16() const
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
The instances of the Type class are immutable: once they are created, they are never changed.
@ FloatTyID
32-bit floating point type
@ IntegerTyID
Arbitrary bit width integers.
@ DoubleTyID
64-bit floating point type
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
char & AMDGPUAtomicOptimizerID
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)