LLVM 23.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "AMDGPUTargetMachine.h"
15#include "GCNSubtarget.h"
17#include "llvm/IR/IntrinsicsAMDGPU.h"
18#include "llvm/IR/IntrinsicsR600.h"
21#include <cstdint>
22
23#define DEBUG_TYPE "amdgpu-attributor"
24
25using namespace llvm;
26
28 "amdgpu-indirect-call-specialization-threshold",
30 "A threshold controls whether an indirect call will be specialized"),
31 cl::init(3));
32
33#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34
36#include "AMDGPUAttributes.def"
38};
39
40#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
41
44#include "AMDGPUAttributes.def"
47};
48
49#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
50static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
52#include "AMDGPUAttributes.def"
53};
54
55// We do not need to note the x workitem or workgroup id because they are always
56// initialized.
57//
58// TODO: We should not add the attributes if the known compile time workgroup
59// size is 1 for y/z.
61intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
62 bool HasApertureRegs, bool SupportsGetDoorBellID,
63 unsigned CodeObjectVersion) {
64 switch (ID) {
65 case Intrinsic::amdgcn_workitem_id_x:
66 NonKernelOnly = true;
67 return WORKITEM_ID_X;
68 case Intrinsic::amdgcn_workgroup_id_x:
69 NonKernelOnly = true;
70 return WORKGROUP_ID_X;
71 case Intrinsic::amdgcn_workitem_id_y:
72 case Intrinsic::r600_read_tidig_y:
73 return WORKITEM_ID_Y;
74 case Intrinsic::amdgcn_workitem_id_z:
75 case Intrinsic::r600_read_tidig_z:
76 return WORKITEM_ID_Z;
77 case Intrinsic::amdgcn_workgroup_id_y:
78 case Intrinsic::r600_read_tgid_y:
79 return WORKGROUP_ID_Y;
80 case Intrinsic::amdgcn_workgroup_id_z:
81 case Intrinsic::r600_read_tgid_z:
82 return WORKGROUP_ID_Z;
83 case Intrinsic::amdgcn_cluster_id_x:
84 NonKernelOnly = true;
85 return CLUSTER_ID_X;
86 case Intrinsic::amdgcn_cluster_id_y:
87 return CLUSTER_ID_Y;
88 case Intrinsic::amdgcn_cluster_id_z:
89 return CLUSTER_ID_Z;
90 case Intrinsic::amdgcn_lds_kernel_id:
91 return LDS_KERNEL_ID;
92 case Intrinsic::amdgcn_dispatch_ptr:
93 return DISPATCH_PTR;
94 case Intrinsic::amdgcn_dispatch_id:
95 return DISPATCH_ID;
96 case Intrinsic::amdgcn_implicitarg_ptr:
97 return IMPLICIT_ARG_PTR;
98 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
99 // queue_ptr.
100 case Intrinsic::amdgcn_queue_ptr:
101 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
102 return QUEUE_PTR;
103 case Intrinsic::amdgcn_is_shared:
104 case Intrinsic::amdgcn_is_private:
105 if (HasApertureRegs)
106 return NOT_IMPLICIT_INPUT;
107 // Under V5, we need implicitarg_ptr + offsets to access private_base or
108 // shared_base. For pre-V5, however, need to access them through queue_ptr +
109 // offsets.
110 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
111 : QUEUE_PTR;
112 case Intrinsic::amdgcn_wwm:
113 case Intrinsic::amdgcn_strict_wwm:
114 return WHOLE_WAVE_MODE;
115 case Intrinsic::trap:
116 case Intrinsic::debugtrap:
117 case Intrinsic::ubsantrap:
118 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
119 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
120 : QUEUE_PTR;
121 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
122 return QUEUE_PTR;
123 default:
124 return UNKNOWN_INTRINSIC;
125 }
126}
127
128static bool castRequiresQueuePtr(unsigned SrcAS) {
129 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
130}
131
132static bool isDSAddress(const Constant *C) {
134 if (!GV)
135 return false;
136 unsigned AS = GV->getAddressSpace();
138}
139
140/// Returns true if sanitizer attributes are present on a function.
141static bool hasSanitizerAttributes(const Function &F) {
142 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
143 F.hasFnAttribute(Attribute::SanitizeThread) ||
144 F.hasFnAttribute(Attribute::SanitizeMemory) ||
145 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
146 F.hasFnAttribute(Attribute::SanitizeMemTag);
147}
148
149namespace {
150class AMDGPUInformationCache : public InformationCache {
151public:
152 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
154 SetVector<Function *> *CGSCC, TargetMachine &TM)
155 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
156 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
157
158 TargetMachine &TM;
159
160 enum ConstantStatus : uint8_t {
161 NONE = 0,
162 DS_GLOBAL = 1 << 0,
163 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
164 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
165 ADDR_SPACE_CAST_BOTH_TO_FLAT =
166 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT,
167 CS_WORST = DS_GLOBAL | ADDR_SPACE_CAST_BOTH_TO_FLAT,
168 };
169
170 /// Check if the subtarget has aperture regs.
171 bool hasApertureRegs(Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return ST.hasApertureRegs();
174 }
175
176 /// Check if the subtarget supports GetDoorbellID.
177 bool supportsGetDoorbellID(Function &F) {
178 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
179 return ST.supportsGetDoorbellID();
180 }
181
182 std::optional<std::pair<unsigned, unsigned>>
183 getFlatWorkGroupSizeAttr(const Function &F) const {
184 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
185 if (!R)
186 return std::nullopt;
187 return std::make_pair(R->first, *(R->second));
188 }
189
190 std::pair<unsigned, unsigned>
191 getDefaultFlatWorkGroupSize(const Function &F) const {
192 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
193 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
194 }
195
196 std::pair<unsigned, unsigned>
197 getMaximumFlatWorkGroupRange(const Function &F) {
198 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
199 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
200 }
201
202 /// Get code object version.
203 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
204
205 std::optional<std::pair<unsigned, unsigned>>
206 getWavesPerEUAttr(const Function &F) {
207 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
208 /*OnlyFirstRequired=*/true);
209 if (!Val)
210 return std::nullopt;
211 if (!Val->second) {
212 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
213 Val->second = ST.getMaxWavesPerEU();
214 }
215 return std::make_pair(Val->first, *(Val->second));
216 }
217
218 unsigned getMaxWavesPerEU(const Function &F) {
219 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
220 return ST.getMaxWavesPerEU();
221 }
222
223 unsigned getMaxAddrSpace() const override {
225 }
226
227private:
228 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
229 /// local to flat. These casts may require the queue pointer.
230 static uint8_t visitConstExpr(const ConstantExpr *CE) {
231 uint8_t Status = NONE;
232
233 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
234 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
235 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
236 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
237 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
238 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
239 }
240
241 return Status;
242 }
243
244 /// Get the constant access bitmap for \p C.
245 uint8_t getConstantAccess(const Constant *C) {
246 const auto &It = ConstantStatus.find(C);
247 if (It != ConstantStatus.end())
248 return It->second.value();
249
250 SmallPtrSet<const Constant *, 8> Visited;
252 Worklist.push_back(C);
253 Visited.insert(C);
254
255 uint8_t Result = 0;
256 while (Result != CS_WORST && !Worklist.empty()) {
257 const Constant *CurC = Worklist.pop_back_val();
258
259 std::optional<uint8_t> &CurCResultOrNone = ConstantStatus[CurC];
260 if (CurCResultOrNone) {
261 Result |= CurCResultOrNone.value();
262 continue;
263 }
264 uint8_t CurCResult = 0;
265
266 if (isDSAddress(CurC))
267 CurCResult |= DS_GLOBAL;
268
269 if (const auto *CE = dyn_cast<ConstantExpr>(CurC))
270 CurCResult |= visitConstExpr(CE);
271
272 for (const Use &U : CurC->operands()) {
273 if (const auto *OpC = dyn_cast<Constant>(U)) {
274 if (Visited.insert(OpC).second)
275 Worklist.push_back(OpC);
276 }
277 }
278
279 CurCResultOrNone = CurCResult;
280 Result |= CurCResult;
281 }
282
283 ConstantStatus[C] = Result;
284 return Result;
285 }
286
287public:
288 /// Returns true if \p Fn needs the queue pointer because of \p C.
289 bool needsQueuePtr(const Constant *C, Function &Fn) {
290 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
291 bool HasAperture = hasApertureRegs(Fn);
292
293 // No need to explore the constants.
294 if (!IsNonEntryFunc && HasAperture)
295 return false;
296
297 uint8_t Access = getConstantAccess(C);
298
299 // We need to trap on DS globals in non-entry functions.
300 if (IsNonEntryFunc && (Access & DS_GLOBAL))
301 return true;
302
303 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
304 }
305
306 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
307 uint8_t Access = getConstantAccess(C);
308 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
309 }
310
311private:
312 /// Used to determine if the Constant needs the queue pointer.
313 DenseMap<const Constant *, std::optional<uint8_t>> ConstantStatus;
314 const unsigned CodeObjectVersion;
315};
316
317struct AAAMDAttributes
318 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
319 AbstractAttribute> {
320 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
321 AbstractAttribute>;
322
323 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
324
325 /// Create an abstract attribute view for the position \p IRP.
326 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
327 Attributor &A);
328
329 /// See AbstractAttribute::getName().
330 StringRef getName() const override { return "AAAMDAttributes"; }
331
332 /// See AbstractAttribute::getIdAddr().
333 const char *getIdAddr() const override { return &ID; }
334
335 /// This function should return true if the type of the \p AA is
336 /// AAAMDAttributes.
337 static bool classof(const AbstractAttribute *AA) {
338 return (AA->getIdAddr() == &ID);
339 }
340
341 /// Unique ID (due to the unique address)
342 static const char ID;
343};
344const char AAAMDAttributes::ID = 0;
345
346struct AAUniformWorkGroupSize
347 : public StateWrapper<BooleanState, AbstractAttribute> {
348 using Base = StateWrapper<BooleanState, AbstractAttribute>;
349 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
350
351 /// Create an abstract attribute view for the position \p IRP.
352 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
353 Attributor &A);
354
355 /// See AbstractAttribute::getName().
356 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
357
358 /// See AbstractAttribute::getIdAddr().
359 const char *getIdAddr() const override { return &ID; }
360
361 /// This function should return true if the type of the \p AA is
362 /// AAAMDAttributes.
363 static bool classof(const AbstractAttribute *AA) {
364 return (AA->getIdAddr() == &ID);
365 }
366
367 /// Unique ID (due to the unique address)
368 static const char ID;
369};
370const char AAUniformWorkGroupSize::ID = 0;
371
372struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
373 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
374 : AAUniformWorkGroupSize(IRP, A) {}
375
376 void initialize(Attributor &A) override {
377 Function *F = getAssociatedFunction();
378 CallingConv::ID CC = F->getCallingConv();
379
380 if (CC != CallingConv::AMDGPU_KERNEL)
381 return;
382
383 bool InitialValue = F->hasFnAttribute("uniform-work-group-size");
384
385 if (InitialValue)
386 indicateOptimisticFixpoint();
387 else
388 indicatePessimisticFixpoint();
389 }
390
391 ChangeStatus updateImpl(Attributor &A) override {
392 ChangeStatus Change = ChangeStatus::UNCHANGED;
393
394 auto CheckCallSite = [&](AbstractCallSite CS) {
395 Function *Caller = CS.getInstruction()->getFunction();
396 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
397 << "->" << getAssociatedFunction()->getName() << "\n");
398
399 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
400 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
401 if (!CallerInfo || !CallerInfo->isValidState())
402 return false;
403
404 Change = Change | clampStateAndIndicateChange(this->getState(),
405 CallerInfo->getState());
406
407 return true;
408 };
409
410 bool AllCallSitesKnown = true;
411 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
412 return indicatePessimisticFixpoint();
413
414 return Change;
415 }
416
417 ChangeStatus manifest(Attributor &A) override {
418 if (!getAssumed())
419 return ChangeStatus::UNCHANGED;
420
421 LLVMContext &Ctx = getAssociatedFunction()->getContext();
422 return A.manifestAttrs(getIRPosition(),
423 {Attribute::get(Ctx, "uniform-work-group-size")},
424 /*ForceReplace=*/true);
425 }
426
427 bool isValidState() const override {
428 // This state is always valid, even when the state is false.
429 return true;
430 }
431
432 const std::string getAsStr(Attributor *) const override {
433 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
434 }
435
436 /// See AbstractAttribute::trackStatistics()
437 void trackStatistics() const override {}
438};
439
440AAUniformWorkGroupSize &
441AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
442 Attributor &A) {
444 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
446 "AAUniformWorkGroupSize is only valid for function position");
447}
448
449struct AAAMDAttributesFunction : public AAAMDAttributes {
450 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
451 : AAAMDAttributes(IRP, A) {}
452
453 void initialize(Attributor &A) override {
454 Function *F = getAssociatedFunction();
455
456 // If the function requires the implicit arg pointer due to sanitizers,
457 // assume it's needed even if explicitly marked as not requiring it.
458 // Flat scratch initialization is needed because `asan_malloc_impl`
459 // calls introduced later in pipeline will have flat scratch accesses.
460 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
461 // implementation for `asan_malloc_impl` is updated.
462 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
463 if (HasSanitizerAttrs) {
464 removeAssumedBits(IMPLICIT_ARG_PTR);
465 removeAssumedBits(HOSTCALL_PTR);
466 removeAssumedBits(FLAT_SCRATCH_INIT);
467 }
468
469 for (auto Attr : ImplicitAttrs) {
470 if (HasSanitizerAttrs &&
471 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
472 Attr.first == FLAT_SCRATCH_INIT))
473 continue;
474
475 if (F->hasFnAttribute(Attr.second))
476 addKnownBits(Attr.first);
477 }
478
479 if (F->isDeclaration())
480 return;
481
482 // Ignore functions with graphics calling conventions, these are currently
483 // not allowed to have kernel arguments.
484 if (AMDGPU::isGraphics(F->getCallingConv())) {
485 indicatePessimisticFixpoint();
486 return;
487 }
488 }
489
490 ChangeStatus updateImpl(Attributor &A) override {
491 Function *F = getAssociatedFunction();
492 // The current assumed state used to determine a change.
493 auto OrigAssumed = getAssumed();
494
495 // Check for Intrinsics and propagate attributes.
496 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
497 *this, this->getIRPosition(), DepClassTy::REQUIRED);
498 if (!AAEdges || !AAEdges->isValidState() ||
499 AAEdges->hasNonAsmUnknownCallee())
500 return indicatePessimisticFixpoint();
501
502 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
503
504 bool NeedsImplicit = false;
505 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
506 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
507 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
508 unsigned COV = InfoCache.getCodeObjectVersion();
509
510 for (Function *Callee : AAEdges->getOptimisticEdges()) {
511 Intrinsic::ID IID = Callee->getIntrinsicID();
512 if (IID == Intrinsic::not_intrinsic) {
513 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
514 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
515 if (!AAAMD || !AAAMD->isValidState())
516 return indicatePessimisticFixpoint();
517 *this &= *AAAMD;
518 continue;
519 }
520
521 bool NonKernelOnly = false;
522 ImplicitArgumentMask AttrMask =
523 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
524 HasApertureRegs, SupportsGetDoorbellID, COV);
525
526 if (AttrMask == UNKNOWN_INTRINSIC) {
527 // Assume not-nocallback intrinsics may invoke a function which accesses
528 // implicit arguments.
529 //
530 // FIXME: This isn't really the correct check. We want to ensure it
531 // isn't calling any function that may use implicit arguments regardless
532 // of whether it's internal to the module or not.
533 //
534 // TODO: Ignoring callsite attributes.
535 if (!Callee->hasFnAttribute(Attribute::NoCallback))
536 return indicatePessimisticFixpoint();
537 continue;
538 }
539
540 if (AttrMask != NOT_IMPLICIT_INPUT) {
541 if ((IsNonEntryFunc || !NonKernelOnly))
542 removeAssumedBits(AttrMask);
543 }
544 }
545
546 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
547 if (NeedsImplicit)
548 removeAssumedBits(IMPLICIT_ARG_PTR);
549
550 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
551 // Under V5, we need implicitarg_ptr + offsets to access private_base or
552 // shared_base. We do not actually need queue_ptr.
553 if (COV >= 5)
554 removeAssumedBits(IMPLICIT_ARG_PTR);
555 else
556 removeAssumedBits(QUEUE_PTR);
557 }
558
559 if (funcRetrievesMultigridSyncArg(A, COV)) {
560 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
561 "multigrid_sync_arg needs implicitarg_ptr");
562 removeAssumedBits(MULTIGRID_SYNC_ARG);
563 }
564
565 if (funcRetrievesHostcallPtr(A, COV)) {
566 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
567 removeAssumedBits(HOSTCALL_PTR);
568 }
569
570 if (funcRetrievesHeapPtr(A, COV)) {
571 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
572 removeAssumedBits(HEAP_PTR);
573 }
574
575 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
576 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
577 removeAssumedBits(QUEUE_PTR);
578 }
579
580 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
581 removeAssumedBits(LDS_KERNEL_ID);
582 }
583
584 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
585 removeAssumedBits(DEFAULT_QUEUE);
586
587 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
588 removeAssumedBits(COMPLETION_ACTION);
589
590 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
591 removeAssumedBits(FLAT_SCRATCH_INIT);
592
593 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
594 : ChangeStatus::UNCHANGED;
595 }
596
597 ChangeStatus manifest(Attributor &A) override {
599 LLVMContext &Ctx = getAssociatedFunction()->getContext();
600
601 for (auto Attr : ImplicitAttrs) {
602 if (isKnown(Attr.first))
603 AttrList.push_back(Attribute::get(Ctx, Attr.second));
604 }
605
606 return A.manifestAttrs(getIRPosition(), AttrList,
607 /* ForceReplace */ true);
608 }
609
610 const std::string getAsStr(Attributor *) const override {
611 std::string Str;
612 raw_string_ostream OS(Str);
613 OS << "AMDInfo[";
614 for (auto Attr : ImplicitAttrs)
615 if (isAssumed(Attr.first))
616 OS << ' ' << Attr.second;
617 OS << " ]";
618 return OS.str();
619 }
620
621 /// See AbstractAttribute::trackStatistics()
622 void trackStatistics() const override {}
623
624private:
625 bool checkForQueuePtr(Attributor &A) {
626 Function *F = getAssociatedFunction();
627 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
628
629 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
630
631 bool NeedsQueuePtr = false;
632
633 auto CheckAddrSpaceCasts = [&](Instruction &I) {
634 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
635 if (castRequiresQueuePtr(SrcAS)) {
636 NeedsQueuePtr = true;
637 return false;
638 }
639 return true;
640 };
641
642 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
643
644 // `checkForAllInstructions` is much more cheaper than going through all
645 // instructions, try it first.
646
647 // The queue pointer is not needed if aperture regs is present.
648 if (!HasApertureRegs) {
649 bool UsedAssumedInformation = false;
650 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
651 {Instruction::AddrSpaceCast},
652 UsedAssumedInformation);
653 }
654
655 // If we found that we need the queue pointer, nothing else to do.
656 if (NeedsQueuePtr)
657 return true;
658
659 if (!IsNonEntryFunc && HasApertureRegs)
660 return false;
661
662 for (BasicBlock &BB : *F) {
663 for (Instruction &I : BB) {
664 for (const Use &U : I.operands()) {
665 if (const auto *C = dyn_cast<Constant>(U)) {
666 if (InfoCache.needsQueuePtr(C, *F))
667 return true;
668 }
669 }
670 }
671 }
672
673 return false;
674 }
675
676 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
678 AA::RangeTy Range(Pos, 8);
679 return funcRetrievesImplicitKernelArg(A, Range);
680 }
681
682 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
684 AA::RangeTy Range(Pos, 8);
685 return funcRetrievesImplicitKernelArg(A, Range);
686 }
687
688 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
690 AA::RangeTy Range(Pos, 8);
691 return funcRetrievesImplicitKernelArg(A, Range);
692 }
693
694 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
696 AA::RangeTy Range(Pos, 8);
697 return funcRetrievesImplicitKernelArg(A, Range);
698 }
699
700 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
701 if (COV < 5)
702 return false;
704 return funcRetrievesImplicitKernelArg(A, Range);
705 }
706
707 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
708 if (COV < 5)
709 return false;
711 return funcRetrievesImplicitKernelArg(A, Range);
712 }
713
714 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
715 // Check if this is a call to the implicitarg_ptr builtin and it
716 // is used to retrieve the hostcall pointer. The implicit arg for
717 // hostcall is not used only if every use of the implicitarg_ptr
718 // is a load that clearly does not retrieve any byte of the
719 // hostcall pointer. We check this by tracing all the uses of the
720 // initial call to the implicitarg_ptr intrinsic.
721 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
722 auto &Call = cast<CallBase>(I);
723 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
724 return true;
725
726 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
727 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
728 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
729 return false;
730
731 return PointerInfoAA->forallInterferingAccesses(
732 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
733 return Acc.getRemoteInst()->isDroppable();
734 });
735 };
736
737 bool UsedAssumedInformation = false;
738 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
739 UsedAssumedInformation);
740 }
741
742 bool funcRetrievesLDSKernelId(Attributor &A) {
743 auto DoesNotRetrieve = [&](Instruction &I) {
744 auto &Call = cast<CallBase>(I);
745 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
746 };
747 bool UsedAssumedInformation = false;
748 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
749 UsedAssumedInformation);
750 }
751
752 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
753 // not to be set.
754 bool needFlatScratchInit(Attributor &A) {
755 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
756
757 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
758 // there is a cast from PRIVATE_ADDRESS.
759 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
760 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
762 };
763
764 bool UsedAssumedInformation = false;
765 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
766 {Instruction::AddrSpaceCast},
767 UsedAssumedInformation))
768 return true;
769
770 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
771 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
772
773 Function *F = getAssociatedFunction();
774 for (Instruction &I : instructions(F)) {
775 for (const Use &U : I.operands()) {
776 if (const auto *C = dyn_cast<Constant>(U)) {
777 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
778 return true;
779 }
780 }
781 }
782
783 // Finally check callees.
784
785 // This is called on each callee; false means callee shouldn't have
786 // no-flat-scratch-init.
787 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
788 const auto &CB = cast<CallBase>(I);
789 const Function *Callee = CB.getCalledFunction();
790
791 // Callee == 0 for inline asm or indirect call with known callees.
792 // In the latter case, updateImpl() already checked the callees and we
793 // know their FLAT_SCRATCH_INIT bit is set.
794 // If function has indirect call with unknown callees, the bit is
795 // already removed in updateImpl() and execution won't reach here.
796 if (!Callee)
797 return true;
798
799 return Callee->getIntrinsicID() !=
800 Intrinsic::amdgcn_addrspacecast_nonnull;
801 };
802
803 UsedAssumedInformation = false;
804 // If any callee is false (i.e. need FlatScratchInit),
805 // checkForAllCallLikeInstructions returns false, in which case this
806 // function returns true.
807 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
808 UsedAssumedInformation);
809 }
810};
811
812AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
813 Attributor &A) {
815 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
816 llvm_unreachable("AAAMDAttributes is only valid for function position");
817}
818
819/// Base class to derive different size ranges.
820struct AAAMDSizeRangeAttribute
821 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
822 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
823
824 StringRef AttrName;
825
826 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
827 StringRef AttrName)
828 : Base(IRP, 32), AttrName(AttrName) {}
829
830 /// See AbstractAttribute::trackStatistics()
831 void trackStatistics() const override {}
832
833 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
834 ChangeStatus Change = ChangeStatus::UNCHANGED;
835
836 auto CheckCallSite = [&](AbstractCallSite CS) {
837 Function *Caller = CS.getInstruction()->getFunction();
838 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
839 << "->" << getAssociatedFunction()->getName() << '\n');
840
841 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
842 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
843 if (!CallerInfo || !CallerInfo->isValidState())
844 return false;
845
846 Change |=
847 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
848
849 return true;
850 };
851
852 bool AllCallSitesKnown = true;
853 if (!A.checkForAllCallSites(CheckCallSite, *this,
854 /*RequireAllCallSites=*/true,
855 AllCallSitesKnown))
856 return indicatePessimisticFixpoint();
857
858 return Change;
859 }
860
861 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
862 /// attribute if it is not same as default.
864 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
865 std::pair<unsigned, unsigned> Default) {
866 auto [Min, Max] = Default;
867 unsigned Lower = getAssumed().getLower().getZExtValue();
868 unsigned Upper = getAssumed().getUpper().getZExtValue();
869
870 // Clamp the range to the default value.
871 if (Lower < Min)
872 Lower = Min;
873 if (Upper > Max + 1)
874 Upper = Max + 1;
875
876 // No manifest if the value is invalid or same as default after clamp.
877 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
878 return ChangeStatus::UNCHANGED;
879
880 Function *F = getAssociatedFunction();
881 LLVMContext &Ctx = F->getContext();
882 SmallString<10> Buffer;
883 raw_svector_ostream OS(Buffer);
884 OS << Lower << ',' << Upper - 1;
885 return A.manifestAttrs(getIRPosition(),
886 {Attribute::get(Ctx, AttrName, OS.str())},
887 /*ForceReplace=*/true);
888 }
889
890 const std::string getAsStr(Attributor *) const override {
891 std::string Str;
892 raw_string_ostream OS(Str);
893 OS << getName() << '[';
894 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
895 OS << ']';
896 return OS.str();
897 }
898};
899
900/// Propagate amdgpu-flat-work-group-size attribute.
901struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
902 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
903 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
904
905 void initialize(Attributor &A) override {
906 Function *F = getAssociatedFunction();
907 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
908
909 bool HasAttr = false;
910 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
911 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
912
913 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
914 // We only consider an attribute that is not max range because the front
915 // end always emits the attribute, unfortunately, and sometimes it emits
916 // the max range.
917 if (*Attr != MaxRange) {
918 Range = *Attr;
919 HasAttr = true;
920 }
921 }
922
923 // We don't want to directly clamp the state if it's the max range because
924 // that is basically the worst state.
925 if (Range == MaxRange)
926 return;
927
928 auto [Min, Max] = Range;
929 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
930 IntegerRangeState IRS(CR);
931 clampStateAndIndicateChange(this->getState(), IRS);
932
933 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
934 indicateOptimisticFixpoint();
935 }
936
937 ChangeStatus updateImpl(Attributor &A) override {
938 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
939 }
940
941 /// Create an abstract attribute view for the position \p IRP.
942 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
943 Attributor &A);
944
945 ChangeStatus manifest(Attributor &A) override {
946 Function *F = getAssociatedFunction();
947 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
948 return emitAttributeIfNotDefaultAfterClamp(
949 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
950 }
951
952 /// See AbstractAttribute::getName()
953 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
954
955 /// See AbstractAttribute::getIdAddr()
956 const char *getIdAddr() const override { return &ID; }
957
958 /// This function should return true if the type of the \p AA is
959 /// AAAMDFlatWorkGroupSize
960 static bool classof(const AbstractAttribute *AA) {
961 return (AA->getIdAddr() == &ID);
962 }
963
964 /// Unique ID (due to the unique address)
965 static const char ID;
966};
967
968const char AAAMDFlatWorkGroupSize::ID = 0;
969
970AAAMDFlatWorkGroupSize &
971AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
972 Attributor &A) {
974 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
976 "AAAMDFlatWorkGroupSize is only valid for function position");
977}
978
979struct TupleDecIntegerRangeState : public AbstractState {
980 DecIntegerState<uint32_t> X, Y, Z;
981
982 bool isValidState() const override {
983 return X.isValidState() && Y.isValidState() && Z.isValidState();
984 }
985
986 bool isAtFixpoint() const override {
987 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
988 }
989
990 ChangeStatus indicateOptimisticFixpoint() override {
991 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
992 Z.indicateOptimisticFixpoint();
993 }
994
995 ChangeStatus indicatePessimisticFixpoint() override {
996 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
997 Z.indicatePessimisticFixpoint();
998 }
999
1000 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
1001 X ^= Other.X;
1002 Y ^= Other.Y;
1003 Z ^= Other.Z;
1004 return *this;
1005 }
1006
1007 bool operator==(const TupleDecIntegerRangeState &Other) const {
1008 return X == Other.X && Y == Other.Y && Z == Other.Z;
1009 }
1010
1011 TupleDecIntegerRangeState &getAssumed() { return *this; }
1012 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1013};
1014
1015using AAAMDMaxNumWorkgroupsState =
1016 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1017
1018/// Propagate amdgpu-max-num-workgroups attribute.
1019struct AAAMDMaxNumWorkgroups
1020 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1021 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1022
1023 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1024
1025 void initialize(Attributor &A) override {
1026 Function *F = getAssociatedFunction();
1027
1028 SmallVector<unsigned> MaxNumWorkgroups = AMDGPU::getMaxNumWorkGroups(*F);
1029
1030 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1031 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1032 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1033
1034 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1035 indicatePessimisticFixpoint();
1036 }
1037
1038 ChangeStatus updateImpl(Attributor &A) override {
1039 ChangeStatus Change = ChangeStatus::UNCHANGED;
1040
1041 auto CheckCallSite = [&](AbstractCallSite CS) {
1042 Function *Caller = CS.getInstruction()->getFunction();
1043 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1044 << "->" << getAssociatedFunction()->getName() << '\n');
1045
1046 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1047 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1048 if (!CallerInfo || !CallerInfo->isValidState())
1049 return false;
1050
1051 Change |=
1052 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1053 return true;
1054 };
1055
1056 bool AllCallSitesKnown = true;
1057 if (!A.checkForAllCallSites(CheckCallSite, *this,
1058 /*RequireAllCallSites=*/true,
1059 AllCallSitesKnown))
1060 return indicatePessimisticFixpoint();
1061
1062 return Change;
1063 }
1064
1065 /// Create an abstract attribute view for the position \p IRP.
1066 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1067 Attributor &A);
1068
1069 ChangeStatus manifest(Attributor &A) override {
1070 Function *F = getAssociatedFunction();
1071 LLVMContext &Ctx = F->getContext();
1072 SmallString<32> Buffer;
1073 raw_svector_ostream OS(Buffer);
1074 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1075
1076 // TODO: Should annotate loads of the group size for this to do anything
1077 // useful.
1078 return A.manifestAttrs(
1079 getIRPosition(),
1080 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1081 /* ForceReplace= */ true);
1082 }
1083
1084 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1085
1086 const std::string getAsStr(Attributor *) const override {
1087 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1088 raw_string_ostream OS(Buffer);
1089 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1090 << ']';
1091 return OS.str();
1092 }
1093
1094 const char *getIdAddr() const override { return &ID; }
1095
1096 /// This function should return true if the type of the \p AA is
1097 /// AAAMDMaxNumWorkgroups
1098 static bool classof(const AbstractAttribute *AA) {
1099 return (AA->getIdAddr() == &ID);
1100 }
1101
1102 void trackStatistics() const override {}
1103
1104 /// Unique ID (due to the unique address)
1105 static const char ID;
1106};
1107
1108const char AAAMDMaxNumWorkgroups::ID = 0;
1109
1110AAAMDMaxNumWorkgroups &
1111AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1113 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1114 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1115}
1116
1117/// Propagate amdgpu-waves-per-eu attribute.
1118struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1119 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1120 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1121
1122 void initialize(Attributor &A) override {
1123 Function *F = getAssociatedFunction();
1124 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1125
1126 // If the attribute exists, we will honor it if it is not the default.
1127 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1128 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1129 1U, InfoCache.getMaxWavesPerEU(*F)};
1130 if (*Attr != MaxWavesPerEURange) {
1131 auto [Min, Max] = *Attr;
1132 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1133 IntegerRangeState RangeState(Range);
1134 this->getState() = RangeState;
1135 indicateOptimisticFixpoint();
1136 return;
1137 }
1138 }
1139
1140 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1141 indicatePessimisticFixpoint();
1142 }
1143
1144 ChangeStatus updateImpl(Attributor &A) override {
1145 ChangeStatus Change = ChangeStatus::UNCHANGED;
1146
1147 auto CheckCallSite = [&](AbstractCallSite CS) {
1148 Function *Caller = CS.getInstruction()->getFunction();
1149 Function *Func = getAssociatedFunction();
1150 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1151 << "->" << Func->getName() << '\n');
1152 (void)Func;
1153
1154 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1155 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1156 if (!CallerAA || !CallerAA->isValidState())
1157 return false;
1158
1159 ConstantRange Assumed = getAssumed();
1160 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1161 CallerAA->getAssumed().getLower().getZExtValue());
1162 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1163 CallerAA->getAssumed().getUpper().getZExtValue());
1164 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1165 IntegerRangeState RangeState(Range);
1166 getState() = RangeState;
1167 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1168 : ChangeStatus::CHANGED;
1169
1170 return true;
1171 };
1172
1173 bool AllCallSitesKnown = true;
1174 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1175 return indicatePessimisticFixpoint();
1176
1177 return Change;
1178 }
1179
1180 /// Create an abstract attribute view for the position \p IRP.
1181 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1182 Attributor &A);
1183
1184 ChangeStatus manifest(Attributor &A) override {
1185 Function *F = getAssociatedFunction();
1186 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1187 return emitAttributeIfNotDefaultAfterClamp(
1188 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1189 }
1190
1191 /// See AbstractAttribute::getName()
1192 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1193
1194 /// See AbstractAttribute::getIdAddr()
1195 const char *getIdAddr() const override { return &ID; }
1196
1197 /// This function should return true if the type of the \p AA is
1198 /// AAAMDWavesPerEU
1199 static bool classof(const AbstractAttribute *AA) {
1200 return (AA->getIdAddr() == &ID);
1201 }
1202
1203 /// Unique ID (due to the unique address)
1204 static const char ID;
1205};
1206
1207const char AAAMDWavesPerEU::ID = 0;
1208
1209AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1210 Attributor &A) {
1212 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1213 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1214}
1215
1216/// Compute the minimum number of AGPRs required to allocate the inline asm.
1217static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1218 const CallBase &Call) {
1219 unsigned ArgNo = 0;
1220 unsigned ResNo = 0;
1221 unsigned AGPRDefCount = 0;
1222 unsigned AGPRUseCount = 0;
1223 unsigned MaxPhysReg = 0;
1224 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1225
1226 // TODO: Overestimates due to not accounting for tied operands
1227 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1228 Type *Ty = nullptr;
1229 switch (CI.Type) {
1230 case InlineAsm::isOutput: {
1231 Ty = Call.getType();
1232 if (auto *STy = dyn_cast<StructType>(Ty))
1233 Ty = STy->getElementType(ResNo);
1234 ++ResNo;
1235 break;
1236 }
1237 case InlineAsm::isInput: {
1238 Ty = Call.getArgOperand(ArgNo++)->getType();
1239 break;
1240 }
1241 case InlineAsm::isLabel:
1242 continue;
1244 // Parse the physical register reference.
1245 break;
1246 }
1247
1248 for (StringRef Code : CI.Codes) {
1249 unsigned RegCount = 0;
1250 if (Code.starts_with("a")) {
1251 // Virtual register, compute number of registers based on the type.
1252 //
1253 // We ought to be going through TargetLowering to get the number of
1254 // registers, but we should avoid the dependence on CodeGen here.
1255 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1256 } else {
1257 // Physical register reference
1258 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1259 if (Kind == 'a') {
1260 RegCount = NumRegs;
1261 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1262 }
1263
1264 continue;
1265 }
1266
1267 if (CI.Type == InlineAsm::isOutput) {
1268 // Apply tuple alignment requirement
1269 //
1270 // TODO: This is more conservative than necessary.
1271 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1272
1273 AGPRDefCount += RegCount;
1274 if (CI.isEarlyClobber) {
1275 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1276 AGPRUseCount += RegCount;
1277 }
1278 } else {
1279 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1280 AGPRUseCount += RegCount;
1281 }
1282 }
1283 }
1284
1285 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1286
1287 // TODO: This is overly conservative. If there are any physical registers,
1288 // allocate any virtual registers after them so we don't have to solve optimal
1289 // packing.
1290 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1291}
1292
1293struct AAAMDGPUMinAGPRAlloc
1294 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1295 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1296 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1297
1298 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1299 Attributor &A) {
1301 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1303 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1304 }
1305
1306 void initialize(Attributor &A) override {
1307 Function *F = getAssociatedFunction();
1308 auto [MinNumAGPR, MaxNumAGPR] =
1309 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1310 /*OnlyFirstRequired=*/true);
1311 if (MinNumAGPR == 0) {
1312 indicateOptimisticFixpoint();
1313 return;
1314 }
1315
1317 indicatePessimisticFixpoint();
1318 }
1319
1320 const std::string getAsStr(Attributor *A) const override {
1321 std::string Str = "amdgpu-agpr-alloc=";
1322 raw_string_ostream OS(Str);
1323 OS << getAssumed();
1324 return OS.str();
1325 }
1326
1327 void trackStatistics() const override {}
1328
1329 ChangeStatus updateImpl(Attributor &A) override {
1330 DecIntegerState<> Maximum;
1331
1332 // Check for cases which require allocation of AGPRs. The only cases where
1333 // AGPRs are required are if there are direct references to AGPRs, so inline
1334 // assembly and special intrinsics.
1335 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1336 const auto &CB = cast<CallBase>(I);
1337 const Value *CalleeOp = CB.getCalledOperand();
1338
1339 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1340 // Technically, the inline asm could be invoking a call to an unknown
1341 // external function that requires AGPRs, but ignore that.
1342 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1343 Maximum.takeAssumedMaximum(NumRegs);
1344 return true;
1345 }
1346 switch (CB.getIntrinsicID()) {
1348 break;
1349 case Intrinsic::write_register:
1350 case Intrinsic::read_register:
1351 case Intrinsic::read_volatile_register: {
1352 const MDString *RegName = cast<MDString>(
1354 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1355 ->getOperand(0));
1356 auto [Kind, RegIdx, NumRegs] =
1358 if (Kind == 'a')
1359 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1360
1361 return true;
1362 }
1363 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1364 // the nocallback attribute, so the AMDGPU attributor can conservatively
1365 // drop all implicitly-known inputs and AGPR allocation information. Make
1366 // sure we still infer that no implicit inputs are required and that the
1367 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1368 // function which requires AGPRs, so we need to check if the called
1369 // function has the "trap-func-name" attribute.
1370 case Intrinsic::trap:
1371 case Intrinsic::debugtrap:
1372 case Intrinsic::ubsantrap:
1373 return CB.hasFnAttr(Attribute::NoCallback) ||
1374 !CB.hasFnAttr("trap-func-name");
1375 default:
1376 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1377 // required to use AGPRs.
1378 // Assume !nocallback intrinsics may call a function which requires
1379 // AGPRs.
1380 return CB.hasFnAttr(Attribute::NoCallback);
1381 }
1382
1383 // TODO: Handle callsite attributes
1384 auto *CBEdges = A.getAAFor<AACallEdges>(
1385 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1386 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1388 return false;
1389 }
1390
1391 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1392 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1393 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1394 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1396 return false;
1397 }
1398
1399 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1400 }
1401
1402 return true;
1403 };
1404
1405 bool UsedAssumedInformation = false;
1406 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1407 UsedAssumedInformation))
1408 return indicatePessimisticFixpoint();
1409
1410 return clampStateAndIndicateChange(getState(), Maximum);
1411 }
1412
1413 ChangeStatus manifest(Attributor &A) override {
1414 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1415 SmallString<4> Buffer;
1416 raw_svector_ostream OS(Buffer);
1417 OS << getAssumed();
1418
1419 return A.manifestAttrs(
1420 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1421 }
1422
1423 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1424 const char *getIdAddr() const override { return &ID; }
1425
1426 /// This function should return true if the type of the \p AA is
1427 /// AAAMDGPUMinAGPRAllocs
1428 static bool classof(const AbstractAttribute *AA) {
1429 return (AA->getIdAddr() == &ID);
1430 }
1431
1432 static const char ID;
1433};
1434
1435const char AAAMDGPUMinAGPRAlloc::ID = 0;
1436
1437/// An abstract attribute to propagate the function attribute
1438/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1439struct AAAMDGPUClusterDims
1440 : public StateWrapper<BooleanState, AbstractAttribute> {
1441 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1442 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1443
1444 /// Create an abstract attribute view for the position \p IRP.
1445 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1446 Attributor &A);
1447
1448 /// See AbstractAttribute::getName().
1449 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1450
1451 /// See AbstractAttribute::getIdAddr().
1452 const char *getIdAddr() const override { return &ID; }
1453
1454 /// This function should return true if the type of the \p AA is
1455 /// AAAMDGPUClusterDims.
1456 static bool classof(const AbstractAttribute *AA) {
1457 return AA->getIdAddr() == &ID;
1458 }
1459
1460 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1461
1462 /// Unique ID (due to the unique address)
1463 static const char ID;
1464};
1465
1466const char AAAMDGPUClusterDims::ID = 0;
1467
1468struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1469 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1470 : AAAMDGPUClusterDims(IRP, A) {}
1471
1472 void initialize(Attributor &A) override {
1473 Function *F = getAssociatedFunction();
1474 assert(F && "empty associated function");
1475
1477
1478 // No matter what a kernel function has, it is final.
1479 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1480 if (Attr.isUnknown())
1481 indicatePessimisticFixpoint();
1482 else
1483 indicateOptimisticFixpoint();
1484 }
1485 }
1486
1487 const std::string getAsStr(Attributor *A) const override {
1488 if (!getAssumed() || Attr.isUnknown())
1489 return "unknown";
1490 if (Attr.isNoCluster())
1491 return "no";
1492 if (Attr.isVariableDims())
1493 return "variable";
1494 return Attr.to_string();
1495 }
1496
1497 void trackStatistics() const override {}
1498
1499 ChangeStatus updateImpl(Attributor &A) override {
1500 auto OldState = Attr;
1501
1502 auto CheckCallSite = [&](AbstractCallSite CS) {
1503 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1504 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1505 DepClassTy::REQUIRED);
1506 if (!CallerAA || !CallerAA->isValidState())
1507 return false;
1508
1509 return merge(CallerAA->getClusterDims());
1510 };
1511
1512 bool UsedAssumedInformation = false;
1513 if (!A.checkForAllCallSites(CheckCallSite, *this,
1514 /*RequireAllCallSites=*/true,
1515 UsedAssumedInformation))
1516 return indicatePessimisticFixpoint();
1517
1518 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1519 }
1520
1521 ChangeStatus manifest(Attributor &A) override {
1522 if (Attr.isUnknown())
1523 return ChangeStatus::UNCHANGED;
1524 return A.manifestAttrs(
1525 getIRPosition(),
1526 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1527 Attr.to_string())},
1528 /*ForceReplace=*/true);
1529 }
1530
1531 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1532 return Attr;
1533 }
1534
1535private:
1536 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1537 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1538 // propagation.
1539 if (Attr.isUnknown() && Other.isUnknown())
1540 return true;
1541
1542 // Case 2: The other is determined, but we are unknown yet, we simply take
1543 // the other's value.
1544 if (Attr.isUnknown()) {
1545 Attr = Other;
1546 return true;
1547 }
1548
1549 // Case 3: We are determined but the other is unknown yet, we simply keep
1550 // everything unchanged.
1551 if (Other.isUnknown())
1552 return true;
1553
1554 // After this point, both are determined.
1555
1556 // Case 4: If they are same, we do nothing.
1557 if (Attr == Other)
1558 return true;
1559
1560 // Now they are not same.
1561
1562 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1563 // would hold), then it is unknown whether cluster will be used, and the
1564 // state is final, unlike case 1.
1565 if (Attr.isNoCluster() || Other.isNoCluster()) {
1566 Attr.setUnknown();
1567 return false;
1568 }
1569
1570 // Case 6: Both of us use cluster, but the dims are different, so the result
1571 // is, cluster is used, but we just don't have a fixed dims.
1572 Attr.setVariableDims();
1573 return true;
1574 }
1575
1576 AMDGPU::ClusterDimsAttr Attr;
1577
1578 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1579};
1580
1581AAAMDGPUClusterDims &
1582AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1584 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1585 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1586}
1587
1588static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
1589 bool DeleteFns, Module &M, AnalysisGetter &AG,
1590 TargetMachine &TM, AMDGPUAttributorOptions Options,
1591 ThinOrFullLTOPhase LTOPhase) {
1592
1593 CallGraphUpdater CGUpdater;
1595 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1596 DenseSet<const char *> Allowed(
1597 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1598 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1599 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1600 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1603 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1604
1605 AttributorConfig AC(CGUpdater);
1606 AC.IsClosedWorldModule = Options.IsClosedWorld;
1607 AC.Allowed = &Allowed;
1608 AC.IsModulePass = IsModulePass;
1609 AC.DeleteFns = DeleteFns;
1610 AC.DefaultInitializeLiveInternals = false;
1611 AC.IndirectCalleeSpecializationCallback =
1612 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1613 Function &Callee, unsigned NumAssumedCallees) {
1614 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1615 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1616 };
1617 AC.IPOAmendableCB = [](const Function &F) {
1618 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1619 };
1620
1621 Attributor A(Functions, InfoCache, AC);
1622
1623 LLVM_DEBUG({
1624 StringRef LTOPhaseStr = to_string(LTOPhase);
1625 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1626 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1627 << (AC.IsClosedWorldModule ? "" : "not ")
1628 << "assumed to be a closed world.\n";
1629 });
1630
1631 for (auto *F : Functions) {
1632 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1633 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1634 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1635 CallingConv::ID CC = F->getCallingConv();
1636 if (!AMDGPU::isEntryFunctionCC(CC)) {
1637 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1638 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1639 }
1640
1641 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1642 if (!F->isDeclaration() && ST.hasClusters())
1643 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1644
1645 if (ST.hasGFX90AInsts())
1646 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1647
1648 for (auto &I : instructions(F)) {
1649 Value *Ptr = nullptr;
1650 if (auto *LI = dyn_cast<LoadInst>(&I))
1651 Ptr = LI->getPointerOperand();
1652 else if (auto *SI = dyn_cast<StoreInst>(&I))
1653 Ptr = SI->getPointerOperand();
1654 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1655 Ptr = RMW->getPointerOperand();
1656 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1657 Ptr = CmpX->getPointerOperand();
1658
1659 if (Ptr) {
1660 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1661 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1662 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1663 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1664 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1665 }
1666 }
1667 }
1668 }
1669
1670 return A.run() == ChangeStatus::CHANGED;
1671}
1672} // namespace
1673
1676
1679 AnalysisGetter AG(FAM);
1680
1681 SetVector<Function *> Functions;
1682 for (Function &F : M) {
1683 if (!F.isDeclaration())
1684 Functions.insert(&F);
1685 }
1686
1687 // TODO: Probably preserves CFG
1688 return runImpl(Functions, /*IsModulePass=*/true, /*DeleteFns=*/true, M, AG,
1689 TM, Options, LTOPhase)
1692}
1693
1696 LazyCallGraph &CG,
1697 CGSCCUpdateResult &UR) {
1698
1700 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
1701 AnalysisGetter AG(FAM);
1702
1703 SetVector<Function *> Functions;
1704 for (LazyCallGraph::Node &N : C) {
1705 Function *F = &N.getFunction();
1706 if (!F->isIntrinsic())
1707 Functions.insert(F);
1708 }
1709
1711 Module *M = C.begin()->getFunction().getParent();
1712 // In the CGSCC pipeline, avoid untracked call graph modifications by
1713 // disabling function deletion, mirroring the generic AttributorCGSCCPass.
1714 return runImpl(Functions, /*IsModulePass=*/false, /*DeleteFns=*/false, *M, AG,
1718}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static bool runImpl(MachineFunction &MF)
Definition CFIFixup.cpp:304
DXIL Resource Access
@ Default
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
A proxy from a FunctionAnalysisManager to an SCC.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A node in the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:280
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
op_range operands()
Definition User.h:267
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:306
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:485
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:390
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define N
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:582
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:650
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:606
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:594
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:625
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:878
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:645
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.