LLVM 23.0.0git
NVPTXLowerArgs.cpp
Go to the documentation of this file.
1//===-- NVPTXLowerArgs.cpp - Lower arguments ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10// Arguments to kernel and device functions are passed via param space,
11// which imposes certain restrictions:
12// http://docs.nvidia.com/cuda/parallel-thread-execution/#state-spaces
13//
14// Kernel parameters are read-only and accessible only via ld.param
15// instruction, directly or via a pointer.
16//
17// Device function parameters are directly accessible via
18// ld.param/st.param, but taking the address of one returns a pointer
19// to a copy created in local space which *can't* be used with
20// ld.param/st.param.
21//
22// Copying a byval struct into local memory in IR allows us to enforce
23// the param space restrictions, gives the rest of IR a pointer w/o
24// param space restrictions, and gives us an opportunity to eliminate
25// the copy.
26//
27// Pointer arguments to kernel functions need more work to be lowered:
28//
29// 1. Convert non-byval pointer arguments of CUDA kernels to pointers in the
30// global address space. This allows later optimizations to emit
31// ld.global.*/st.global.* for accessing these pointer arguments. For
32// example,
33//
34// define void @foo(float* %input) {
35// %v = load float, float* %input, align 4
36// ...
37// }
38//
39// becomes
40//
41// define void @foo(float* %input) {
42// %input2 = addrspacecast float* %input to float addrspace(1)*
43// %input3 = addrspacecast float addrspace(1)* %input2 to float*
44// %v = load float, float* %input3, align 4
45// ...
46// }
47//
48// Later, NVPTXInferAddressSpaces will optimize it to
49//
50// define void @foo(float* %input) {
51// %input2 = addrspacecast float* %input to float addrspace(1)*
52// %v = load float, float addrspace(1)* %input2, align 4
53// ...
54// }
55//
56// 2. Convert byval kernel parameters to pointers in the param address space
57// (so that NVPTX emits ld/st.param). Convert pointers *within* a byval
58// kernel parameter to pointers in the global address space. This allows
59// NVPTX to emit ld/st.global.
60//
61// struct S {
62// int *x;
63// int *y;
64// };
65// __global__ void foo(S s) {
66// int *b = s.y;
67// // use b
68// }
69//
70// "b" points to the global address space. In the IR level,
71//
72// define void @foo(ptr byval %input) {
73// %b_ptr = getelementptr {ptr, ptr}, ptr %input, i64 0, i32 1
74// %b = load ptr, ptr %b_ptr
75// ; use %b
76// }
77//
78// becomes
79//
80// define void @foo({i32*, i32*}* byval %input) {
81// %b_param = addrspacecat ptr %input to ptr addrspace(101)
82// %b_ptr = getelementptr {ptr, ptr}, ptr addrspace(101) %b_param, i64 0, i32 1
83// %b = load ptr, ptr addrspace(101) %b_ptr
84// %b_global = addrspacecast ptr %b to ptr addrspace(1)
85// ; use %b_generic
86// }
87//
88// Create a local copy of kernel byval parameters used in a way that *might* mutate
89// the parameter, by storing it in an alloca. Mutations to "grid_constant" parameters
90// are undefined behaviour, and don't require local copies.
91//
92// define void @foo(ptr byval(%struct.s) align 4 %input) {
93// store i32 42, ptr %input
94// ret void
95// }
96//
97// becomes
98//
99// define void @foo(ptr byval(%struct.s) align 4 %input) #1 {
100// %input1 = alloca %struct.s, align 4
101// %input2 = addrspacecast ptr %input to ptr addrspace(101)
102// %input3 = load %struct.s, ptr addrspace(101) %input2, align 4
103// store %struct.s %input3, ptr %input1, align 4
104// store i32 42, ptr %input1, align 4
105// ret void
106// }
107//
108// If %input were passed to a device function, or written to memory,
109// conservatively assume that %input gets mutated, and create a local copy.
110//
111// Convert param pointers to grid_constant byval kernel parameters that are
112// passed into calls (device functions, intrinsics, inline asm), or otherwise
113// "escape" (into stores/ptrtoints) to the generic address space, using the
114// `nvvm.ptr.param.to.gen` intrinsic, so that NVPTX emits cvta.param
115// (available for sm70+)
116//
117// define void @foo(ptr byval(%struct.s) %input) {
118// ; %input is a grid_constant
119// %call = call i32 @escape(ptr %input)
120// ret void
121// }
122//
123// becomes
124//
125// define void @foo(ptr byval(%struct.s) %input) {
126// %input1 = addrspacecast ptr %input to ptr addrspace(101)
127// ; the following intrinsic converts pointer to generic. We don't use an addrspacecast
128// ; to prevent generic -> param -> generic from getting cancelled out
129// %input1.gen = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) %input1)
130// %call = call i32 @escape(ptr %input1.gen)
131// ret void
132// }
133//
134// TODO: merge this pass with NVPTXInferAddressSpaces so that other passes don't
135// cancel the addrspacecast pair this pass emits.
136//===----------------------------------------------------------------------===//
137
138#include "NVPTX.h"
139#include "NVPTXTargetMachine.h"
140#include "NVPTXUtilities.h"
141#include "NVVMProperties.h"
142#include "llvm/ADT/STLExtras.h"
146#include "llvm/IR/Attributes.h"
147#include "llvm/IR/Function.h"
148#include "llvm/IR/IRBuilder.h"
149#include "llvm/IR/Instructions.h"
151#include "llvm/IR/IntrinsicsNVPTX.h"
152#include "llvm/IR/Type.h"
154#include "llvm/Pass.h"
155#include "llvm/Support/Debug.h"
159
160#define DEBUG_TYPE "nvptx-lower-args"
161
162using namespace llvm;
163using namespace NVPTXAS;
164
165namespace {
166class NVPTXLowerArgsLegacyPass : public FunctionPass {
167 bool runOnFunction(Function &F) override;
168
169public:
170 static char ID; // Pass identification, replacement for typeid
171 NVPTXLowerArgsLegacyPass() : FunctionPass(ID) {}
172 StringRef getPassName() const override {
173 return "Lower pointer arguments of CUDA kernels";
174 }
175 void getAnalysisUsage(AnalysisUsage &AU) const override {
177 }
178};
179} // namespace
180
181char NVPTXLowerArgsLegacyPass::ID = 1;
182
183INITIALIZE_PASS_BEGIN(NVPTXLowerArgsLegacyPass, "nvptx-lower-args",
184 "Lower arguments (NVPTX)", false, false)
186INITIALIZE_PASS_END(NVPTXLowerArgsLegacyPass, "nvptx-lower-args",
187 "Lower arguments (NVPTX)", false, false)
188
189// =============================================================================
190// If the function had a byval struct ptr arg, say foo(ptr byval(%struct.x) %d),
191// and we can't guarantee that the only accesses are loads,
192// then add the following instructions to the first basic block:
193//
194// %temp = alloca %struct.x, align 8
195// %tempd = addrspacecast ptr %d to ptr addrspace(101)
196// %tv = load %struct.x, ptr addrspace(101) %tempd
197// store %struct.x %tv, ptr %temp, align 8
198//
199// The above code allocates some space in the stack and copies the incoming
200// struct from param space to local space.
201// Then replace all occurrences of %d by %temp.
202//
203// In case we know that all users are GEPs or Loads, replace them with the same
204// ones in parameter AS, so we can access them using ld.param.
205// =============================================================================
206
207/// Recursively convert the users of a param to the param address space.
208static void convertToParamAS(ArrayRef<Use *> OldUses, Value *Param) {
209 struct IP {
210 Use *OldUse;
211 Value *NewParam;
212 };
213
214 const auto CloneInstInParamAS = [](const IP &I) -> Value * {
215 auto *OldInst = cast<Instruction>(I.OldUse->getUser());
216 if (auto *LI = dyn_cast<LoadInst>(OldInst)) {
217 LI->setOperand(0, I.NewParam);
218 return LI;
219 }
220 if (auto *GEP = dyn_cast<GetElementPtrInst>(OldInst)) {
221 SmallVector<Value *, 4> Indices(GEP->indices());
222 auto *NewGEP = GetElementPtrInst::Create(
223 GEP->getSourceElementType(), I.NewParam, Indices, GEP->getName(),
224 GEP->getIterator());
225 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
226 return NewGEP;
227 }
228 if (auto *BC = dyn_cast<BitCastInst>(OldInst)) {
229 auto *NewBCType =
231 return BitCastInst::Create(BC->getOpcode(), I.NewParam, NewBCType,
232 BC->getName(), BC->getIterator());
233 }
234 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(OldInst)) {
235 assert(ASC->getDestAddressSpace() == ADDRESS_SPACE_ENTRY_PARAM);
236 (void)ASC;
237 // Just pass through the argument, the old ASC is no longer needed.
238 return I.NewParam;
239 }
240 if (auto *MI = dyn_cast<MemTransferInst>(OldInst)) {
241 if (MI->getRawSource() == I.OldUse->get()) {
242 // convert to memcpy/memmove from param space.
243 IRBuilder<> Builder(OldInst);
244 Intrinsic::ID ID = MI->getIntrinsicID();
245
246 CallInst *B = Builder.CreateMemTransferInst(
247 ID, MI->getRawDest(), MI->getDestAlign(), I.NewParam,
248 MI->getSourceAlign(), MI->getLength(), MI->isVolatile());
249 for (unsigned I : {0, 1})
250 if (uint64_t Bytes = MI->getParamDereferenceableBytes(I))
251 B->addDereferenceableParamAttr(I, Bytes);
252 return B;
253 }
254 }
255
256 llvm_unreachable("Unsupported instruction");
257 };
258
259 auto ItemsToConvert =
260 map_to_vector(OldUses, [=](Use *U) -> IP { return {U, Param}; });
261 SmallVector<Instruction *> InstructionsToDelete;
262
263 while (!ItemsToConvert.empty()) {
264 IP I = ItemsToConvert.pop_back_val();
265 Value *NewInst = CloneInstInParamAS(I);
266 Instruction *OldInst = cast<Instruction>(I.OldUse->getUser());
267
268 if (NewInst && NewInst != OldInst) {
269 // We've created a new instruction. Queue users of the old instruction to
270 // be converted and the instruction itself to be deleted. We can't delete
271 // the old instruction yet, because it's still in use by a load somewhere.
272 for (Use &U : OldInst->uses())
273 ItemsToConvert.push_back({&U, NewInst});
274
275 InstructionsToDelete.push_back(OldInst);
276 }
277 }
278
279 // Now we know that all argument loads are using addresses in parameter space
280 // and we can finally remove the old instructions in generic AS. Instructions
281 // scheduled for removal should be processed in reverse order so the ones
282 // closest to the load are deleted first. Otherwise they may still be in use.
283 // E.g if we have Value = Load(BitCast(GEP(arg))), InstructionsToDelete will
284 // have {GEP,BitCast}. GEP can't be deleted first, because it's still used by
285 // the BitCast.
286 for (Instruction *I : llvm::reverse(InstructionsToDelete))
287 I->eraseFromParent();
288}
289
290// Create a call to the nvvm_internal_addrspace_wrap intrinsic and set the
291// alignment of the return value based on the alignment of the argument.
293 Argument &Arg) {
294 CallInst *ArgInParam = IRB.CreateIntrinsic(
295 Intrinsic::nvvm_internal_addrspace_wrap,
296 {IRB.getPtrTy(ADDRESS_SPACE_ENTRY_PARAM), Arg.getType()}, &Arg, {},
297 Arg.getName() + ".param");
298
299 if (MaybeAlign ParamAlign = Arg.getParamAlign())
300 ArgInParam->addRetAttr(
301 Attribute::getWithAlignment(ArgInParam->getContext(), *ParamAlign));
302
304 Arg.addAttr(Attribute::ReadOnly);
305
306 return ArgInParam;
307}
308
309namespace {
310struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {
311 using Base = PtrUseVisitor<ArgUseChecker>;
312 // Set of phi/select instructions using the Arg
313 SmallPtrSet<Instruction *, 4> Conditionals;
314
315 ArgUseChecker(const DataLayout &DL) : PtrUseVisitor(DL) {}
316
317 PtrInfo visitArgPtr(Argument &A) {
318 assert(A.getType()->isPointerTy());
319 IntegerType *IntIdxTy = cast<IntegerType>(DL.getIndexType(A.getType()));
320 IsOffsetKnown = false;
321 Offset = APInt(IntIdxTy->getBitWidth(), 0);
322 PI.reset();
323
324 LLVM_DEBUG(dbgs() << "Checking Argument " << A << "\n");
325 // Enqueue the uses of this pointer.
326 enqueueUsers(A);
327
328 // Visit all the uses off the worklist until it is empty.
329 // Note that unlike PtrUseVisitor we intentionally do not track offsets.
330 // We're only interested in how we use the pointer.
331 while (!(Worklist.empty() || PI.isAborted())) {
332 UseToVisit ToVisit = Worklist.pop_back_val();
333 U = ToVisit.UseAndIsOffsetKnown.getPointer();
334 Instruction *I = cast<Instruction>(U->getUser());
335 LLVM_DEBUG(dbgs() << "Processing " << *I << "\n");
336 Base::visit(I);
337 }
338 if (PI.isEscaped())
339 LLVM_DEBUG(dbgs() << "Argument pointer escaped: " << *PI.getEscapingInst()
340 << "\n");
341 else if (PI.isAborted())
342 LLVM_DEBUG(dbgs() << "Pointer use needs a copy: " << *PI.getAbortingInst()
343 << "\n");
344 LLVM_DEBUG(dbgs() << "Traversed " << Conditionals.size()
345 << " conditionals\n");
346 return PI;
347 }
348
349 void visitStoreInst(StoreInst &SI) {
350 // Storing the pointer escapes it.
351 if (U->get() == SI.getValueOperand())
352 return PI.setEscapedAndAborted(&SI);
353
354 PI.setAborted(&SI);
355 }
356
357 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
358 // ASC to param space are no-ops and do not need a copy
360 return PI.setEscapedAndAborted(&ASC);
362 }
363
364 void visitPtrToIntInst(PtrToIntInst &I) { Base::visitPtrToIntInst(I); }
365
366 void visitPHINodeOrSelectInst(Instruction &I) {
368 enqueueUsers(I);
369 Conditionals.insert(&I);
370 }
371 // PHI and select just pass through the pointers.
372 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
373 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
374
375 // memcpy/memmove are OK when the pointer is source. We can convert them to
376 // AS-specific memcpy.
377 void visitMemTransferInst(MemTransferInst &II) {
378 if (*U == II.getRawDest())
379 PI.setAborted(&II);
380 }
381
382 void visitMemSetInst(MemSetInst &II) { PI.setAborted(&II); }
383}; // struct ArgUseChecker
384
385void copyByValParam(Function &F, Argument &Arg) {
386 LLVM_DEBUG(dbgs() << "Creating a local copy of " << Arg << "\n");
387 Type *ByValType = Arg.getParamByValType();
388 const DataLayout &DL = F.getDataLayout();
389 IRBuilder<> IRB(&F.getEntryBlock().front());
390 AllocaInst *AllocA = IRB.CreateAlloca(ByValType, nullptr, Arg.getName());
391 // Set the alignment to alignment of the byval parameter. This is because,
392 // later load/stores assume that alignment, and we are going to replace
393 // the use of the byval parameter with this alloca instruction.
394 AllocA->setAlignment(
395 Arg.getParamAlign().value_or(DL.getPrefTypeAlign(ByValType)));
396 Arg.replaceAllUsesWith(AllocA);
397
398 Value *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, Arg);
399
400 // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
401 // addrspacecast preserves alignment. Since params are constant, this load
402 // is definitely not volatile.
403 const auto ArgSize = *AllocA->getAllocationSize(DL);
404 IRB.CreateMemCpy(AllocA, AllocA->getAlign(), ArgInParamAS, AllocA->getAlign(),
405 ArgSize);
406}
407} // namespace
408
409static bool argIsProcessed(Argument *Arg) {
410 if (Arg->use_empty())
411 return true;
412
413 // If the argument is already wrapped, it was processed by this pass before.
414 if (Arg->hasOneUse())
415 if (const auto *II = dyn_cast<IntrinsicInst>(*Arg->user_begin()))
416 if (II->getIntrinsicID() == Intrinsic::nvvm_internal_addrspace_wrap)
417 return true;
418
419 return false;
420}
421
423 const bool HasCvtaParam) {
425
426 const DataLayout &DL = F.getDataLayout();
427 IRBuilder<> IRB(&F.getEntryBlock().front());
428
429 if (argIsProcessed(Arg))
430 return;
431
432 // (1) First check the easy case, if were able to trace through all the uses
433 // and we can convert them all to param AS, then we'll do this.
434 ArgUseChecker AUC(DL);
435 ArgUseChecker::PtrInfo PI = AUC.visitArgPtr(*Arg);
436 const bool ArgUseIsReadOnly = !(PI.isEscaped() || PI.isAborted());
437 if (ArgUseIsReadOnly && AUC.Conditionals.empty()) {
438 // Convert all loads and intermediate operations to use parameter AS and
439 // skip creation of a local copy of the argument.
441 Value *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg);
442 for (Use *U : UsesToUpdate)
443 convertToParamAS(U, ArgInParamAS);
444 return;
445 }
446
447 // (2) If the argument is grid constant, we get to use the pointer directly.
448 if (HasCvtaParam && (ArgUseIsReadOnly || isParamGridConstant(*Arg))) {
449 LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << *Arg << "\n");
450
451 // Cast argument to param address space. Because the backend will emit the
452 // argument already in the param address space, we need to use the noop
453 // intrinsic, this had the added benefit of preventing other optimizations
454 // from folding away this pair of addrspacecasts.
455 Instruction *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg);
456
457 // Cast param address to generic address space.
458 Value *GenericArg = IRB.CreateAddrSpaceCast(
459 ArgInParamAS, IRB.getPtrTy(ADDRESS_SPACE_GENERIC),
460 Arg->getName() + ".gen");
461
462 Arg->replaceAllUsesWith(GenericArg);
463
464 // Do not replace Arg in the cast to param space
465 ArgInParamAS->setOperand(0, Arg);
466 return;
467 }
468
469 // (3) Otherwise we have to create a copy of the argument in local memory.
470 copyByValParam(F, *Arg);
471}
472
473// =============================================================================
474// Main function for this pass.
475// =============================================================================
477 if (!isKernelFunction(F))
478 return false;
479
480 const NVPTXSubtarget *ST = TM.getSubtargetImpl(F);
481 const bool HasCvtaParam = ST->hasCvtaParam();
482
483 LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n");
484 bool Changed = false;
485 for (Argument &Arg : F.args())
486 if (Arg.hasByValAttr()) {
487 lowerKernelByValParam(&Arg, F, HasCvtaParam);
488 Changed = true;
489 }
490
491 return Changed;
492}
493
494bool NVPTXLowerArgsLegacyPass::runOnFunction(Function &F) {
495 auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
496 return processFunction(F, TM);
497}
498
500 return new NVPTXLowerArgsLegacyPass();
501}
502
504 LLVM_DEBUG(dbgs() << "Creating a copy of byval args of " << F.getName()
505 << "\n");
506 bool Changed = false;
507 if (isKernelFunction(F)) {
508 for (Argument &Arg : F.args())
509 if (Arg.hasByValAttr() && !isParamGridConstant(Arg)) {
510 copyByValParam(F, Arg);
511 Changed = true;
512 }
513 }
514 return Changed;
515}
516
522
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
NVPTX address space definition.
nvptx lower Lower static false void convertToParamAS(ArrayRef< Use * > OldUses, Value *Param)
Recursively convert the users of a param to the param address space.
static CallInst * createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB, Argument &Arg)
static void lowerKernelByValParam(Argument *Arg, Function &F, const bool HasCvtaParam)
static bool copyFunctionByValArgs(Function &F)
static bool argIsProcessed(Argument *Arg)
static bool processFunction(Function &F, NVPTXTargetMachine &TM)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
This file contains some templates that are useful if you are working with the STL at all.
This file defines less commonly used SmallVector utilities.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Target-Independent Code Generator Pass Configuration Options pass.
unsigned getDestAddressSpace() const
Returns the address space of the result.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
LLVM_ABI void addAttr(Attribute::AttrKind Kind)
Definition Function.cpp:320
LLVM_ABI bool hasByValAttr() const
Return true if this argument has the byval attribute.
Definition Function.cpp:128
LLVM_ABI Type * getParamByValType() const
If this is a byval argument, return its type.
Definition Function.cpp:224
LLVM_ABI MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
Definition Function.cpp:215
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:622
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2204
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A base class for visitors over the uses of a pointer value.
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitPtrToIntInst(PtrToIntInst &I)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:403
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
bool use_empty() const
Definition Value.h:347
iterator_range< use_iterator > uses()
Definition Value.h:381
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr StringLiteral GridConstant("nvvm.grid_constant")
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
FunctionPass * createNVPTXLowerArgsPass()
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
bool isParamGridConstant(const Argument &Arg)
bool isKernelFunction(const Function &F)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)