LLVM 23.0.0git
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10///
11/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
12/// code. When passed an MCAsmStreamer it prints assembly and when passed
13/// an MCObjectStreamer it outputs binary code.
14//
15//===----------------------------------------------------------------------===//
16//
17
18#include "AMDGPUAsmPrinter.h"
19#include "AMDGPU.h"
23#include "AMDGPUTargetMachine.h"
24#include "GCNSubtarget.h"
29#include "R600AsmPrinter.h"
35#include "llvm/ADT/StringSet.h"
43#include "llvm/MC/MCAssembler.h"
44#include "llvm/MC/MCContext.h"
46#include "llvm/MC/MCStreamer.h"
47#include "llvm/MC/MCValue.h"
54
55using namespace llvm;
56using namespace llvm::AMDGPU;
57
58// This should get the default rounding mode from the kernel. We just set the
59// default here, but this could change if the OpenCL rounding mode pragmas are
60// used.
61//
62// The denormal mode here should match what is reported by the OpenCL runtime
63// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
64// can also be override to flush with the -cl-denorms-are-zero compiler flag.
65//
66// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
67// precision, and leaves single precision to flush all and does not report
68// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
69// CL_FP_DENORM for both.
70//
71// FIXME: It seems some instructions do not support single precision denormals
72// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
73// and sin_f32, cos_f32 on most parts).
74
75// We want to use these instructions, and using fp32 denormals also causes
76// instructions to run at the double precision rate for the device so it's
77// probably best to just report no single precision denormals.
84
85static AsmPrinter *
87 std::unique_ptr<MCStreamer> &&Streamer) {
88 return new AMDGPUAsmPrinter(tm, std::move(Streamer));
89}
90
98
99namespace {
100class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {
101protected:
102 AMDGPUAsmPrinter *Asm;
103
104public:
105 AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}
106
107 void beginFunction(const MachineFunction *MF) override {}
108
109 void endFunction(const MachineFunction *MF) override { Asm->endFunction(MF); }
110
111 void endModule() override {}
112};
113} // End anonymous namespace
114
116 std::unique_ptr<MCStreamer> Streamer)
117 : AsmPrinter(TM, std::move(Streamer)) {
118 assert(OutStreamer && "AsmPrinter constructed without streamer");
119}
120
122 return "AMDGPU Assembly Printer";
123}
124
126 return &TM.getMCSubtargetInfo();
127}
128
130 if (!OutStreamer)
131 return nullptr;
132 return static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
133}
134
138
139void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
141
142 // TODO: Which one is called first, emitStartOfAsmFile or
143 // emitFunctionBodyStart?
144 if (getTargetStreamer() && !getTargetStreamer()->getTargetID())
145 initializeTargetID(M);
146
149 return;
150
152
155 CodeObjectVersion);
156 HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
157 }
158
161}
162
164 // Init target streamer if it has not yet happened
166 initTargetStreamer(M);
167
168 if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
170
171 // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
172 // Emit HSA Metadata (NT_AMD_HSA_METADATA).
173 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
174 HSAMetadataStream->end();
175 bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
176 (void)Success;
177 assert(Success && "Malformed HSA Metadata");
178 }
179}
180
182 const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
183 const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
184 const Function &F = MF->getFunction();
185
186 // TODO: We're checking this late, would be nice to check it earlier.
187 if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {
189 STM.getCPU() + " is only available on code object version 6 or better");
190 }
191
192 // TODO: Which one is called first, emitStartOfAsmFile or
193 // emitFunctionBodyStart?
194 if (!getTargetStreamer()->getTargetID())
195 initializeTargetID(*F.getParent());
196
197 const auto &FunctionTargetID = STM.getTargetID();
198 // Make sure function's xnack settings are compatible with module's
199 // xnack settings.
200 if (FunctionTargetID.isXnackSupported() &&
201 FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&
202 FunctionTargetID.getXnackSetting() !=
203 getTargetStreamer()->getTargetID()->getXnackSetting()) {
204 OutContext.reportError(
205 {}, "xnack setting of '" + Twine(MF->getName()) +
206 "' function does not match module xnack setting");
207 return;
208 }
209 // Make sure function's sramecc settings are compatible with module's
210 // sramecc settings.
211 if (FunctionTargetID.isSramEccSupported() &&
212 FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&
213 FunctionTargetID.getSramEccSetting() !=
214 getTargetStreamer()->getTargetID()->getSramEccSetting()) {
215 OutContext.reportError(
216 {}, "sramecc setting of '" + Twine(MF->getName()) +
217 "' function does not match module sramecc setting");
218 return;
219 }
220
221 if (!MFI.isEntryFunction())
222 return;
223
224 if (STM.isMesaKernel(F) &&
225 (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
226 F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
227 AMDGPUMCKernelCodeT KernelCode;
228 getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
229 KernelCode.validate(&STM, MF->getContext());
231 }
232
233 if (STM.isAmdHsaOS())
234 HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
235}
236
238 const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
239 if (!MFI.isEntryFunction())
240 return;
241
242 assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);
243
244 auto &Streamer = getTargetStreamer()->getStreamer();
245 auto &Context = Streamer.getContext();
246 auto &ObjectFileInfo = *Context.getObjectFileInfo();
247 auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
248
249 Streamer.pushSection();
250 Streamer.switchSection(&ReadOnlySection);
251
252 // CP microcode requires the kernel descriptor to be allocated on 64 byte
253 // alignment.
254 Streamer.emitValueToAlignment(Align(64), 0, 1, 0);
255 ReadOnlySection.ensureMinAlignment(Align(64));
256
257 const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
258
259 SmallString<128> KernelName;
260 getNameWithPrefix(KernelName, &MF->getFunction());
262 STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
263 CurrentProgramInfo.NumVGPRsForWavesPerEU,
265 CurrentProgramInfo.NumSGPRsForWavesPerEU,
267 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
268 getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Context),
269 Context),
270 CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
271
272 Streamer.popSection();
273}
274
276 Register RegNo = MI->getOperand(0).getReg();
277
279 raw_svector_ostream OS(Str);
280 OS << "implicit-def: "
281 << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
282
283 if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
284 OS << " : SGPR spill to VGPR lane";
285
286 OutStreamer->AddComment(OS.str());
287 OutStreamer->addBlankLine();
288}
289
291 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
293 return;
294 }
295
296 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
297 const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
298 if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
299 SmallString<128> SymbolName;
300 getNameWithPrefix(SymbolName, &MF->getFunction()),
303 }
304 if (DumpCodeInstEmitter) {
305 // Disassemble function name label to text.
306 DisasmLines.push_back(MF->getName().str() + ":");
307 DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
308 HexLines.emplace_back("");
309 }
310
312}
313
315 if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
316 // Write a line for the basic block label if it is not only fallthrough.
317 DisasmLines.push_back((Twine("BB") + Twine(getFunctionNumber()) + "_" +
318 Twine(MBB.getNumber()) + ":")
319 .str());
320 DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
321 HexLines.emplace_back("");
322 }
324}
325
328 if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
329 OutContext.reportError({},
330 Twine(GV->getName()) +
331 ": unsupported initializer for address space");
332 return;
333 }
334
335 const Triple::OSType OS = TM.getTargetTriple().getOS();
336 if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) {
338 return;
339 // With object linking, LDS definitions should have been externalized
340 // by earlier passes (e.g. LDS lowering, named barrier lowering).
341 // Only declarations reach here, emitted as SHN_AMDGPU_LDS symbols
342 // so the linker can assign their offsets.
343 assert(GV->isDeclaration() &&
344 "LDS definitions should have been externalized when object "
345 "linking is enabled");
346 }
347
348 MCSymbol *GVSym = getSymbol(GV);
349
350 GVSym->redefineIfPossible();
351 if (GVSym->isDefined() || GVSym->isVariable())
352 report_fatal_error("symbol '" + Twine(GVSym->getName()) +
353 "' is already defined");
354
355 const DataLayout &DL = GV->getDataLayout();
357 Align Alignment = GV->getAlign().value_or(Align(4));
358
359 emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
360 emitLinkage(GV, GVSym);
361 auto *TS = getTargetStreamer();
362 TS->emitAMDGPULDS(GVSym, Size, Alignment);
363 return;
364 }
365
367}
368
370 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M);
371
372 if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
373 switch (CodeObjectVersion) {
375 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();
376 break;
378 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();
379 break;
381 HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();
382 break;
383 default:
384 reportFatalUsageError("unsupported code object version");
385 }
386
387 addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));
388 }
389
391}
392
393/// Mimics GCNSubtarget::computeOccupancy for MCExpr.
394///
395/// Remove dependency on GCNSubtarget and depend only only the necessary values
396/// for said occupancy computation. Should match computeOccupancy implementation
397/// without passing \p STM on.
398const AMDGPUMCExpr *createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
399 const MCExpr *NumVGPRs,
400 unsigned DynamicVGPRBlockSize,
401 const GCNSubtarget &STM, MCContext &Ctx) {
402 unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
403 unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM, DynamicVGPRBlockSize);
404 unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
405 unsigned Generation = STM.getGeneration();
406
407 auto CreateExpr = [&Ctx](unsigned Value) {
408 return MCConstantExpr::create(Value, Ctx);
409 };
410
412 {CreateExpr(MaxWaves), CreateExpr(Granule),
413 CreateExpr(TargetTotalNumVGPRs),
414 CreateExpr(Generation), CreateExpr(InitOcc),
415 NumSGPRs, NumVGPRs},
416 Ctx);
417}
418
419void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
420 if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv()))
421 return;
422
424 const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
425 MCSymbol *FnSym = TM.getSymbol(&F);
426
427 auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
428 int64_t Val;
429 if (Value->evaluateAsAbsolute(Val)) {
430 Res = Val;
431 return true;
432 }
433 return false;
434 };
435
436 const uint64_t MaxScratchPerWorkitem =
438 MCSymbol *ScratchSizeSymbol =
439 RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
440 uint64_t ScratchSize;
441 if (ScratchSizeSymbol->isVariable() &&
442 TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
443 ScratchSize > MaxScratchPerWorkitem) {
444 DiagnosticInfoStackSize DiagStackSize(F, ScratchSize, MaxScratchPerWorkitem,
445 DS_Error);
446 F.getContext().diagnose(DiagStackSize);
447 }
448
449 // Validate addressable scalar registers (i.e., prior to added implicit
450 // SGPRs).
451 MCSymbol *NumSGPRSymbol =
452 RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
454 !STM.hasSGPRInitBug()) {
455 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
456 uint64_t NumSgpr;
457 if (NumSGPRSymbol->isVariable() &&
458 TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&
459 NumSgpr > MaxAddressableNumSGPRs) {
460 F.getContext().diagnose(DiagnosticInfoResourceLimit(
461 F, "addressable scalar registers", NumSgpr, MaxAddressableNumSGPRs,
463 return;
464 }
465 }
466
467 MCSymbol *VCCUsedSymbol =
468 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
469 MCSymbol *FlatUsedSymbol =
470 RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
471 uint64_t VCCUsed, FlatUsed, NumSgpr;
472
473 if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
474 FlatUsedSymbol->isVariable() &&
475 TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&
476 TryGetMCExprValue(VCCUsedSymbol->getVariableValue(), VCCUsed) &&
477 TryGetMCExprValue(FlatUsedSymbol->getVariableValue(), FlatUsed)) {
478
479 // Recomputes NumSgprs + implicit SGPRs but all symbols should now be
480 // resolvable.
481 NumSgpr += IsaInfo::getNumExtraSGPRs(
482 &STM, VCCUsed, FlatUsed,
483 getTargetStreamer()->getTargetID()->isXnackOnOrAny());
485 STM.hasSGPRInitBug()) {
486 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
487 if (NumSgpr > MaxAddressableNumSGPRs) {
488 F.getContext().diagnose(DiagnosticInfoResourceLimit(
489 F, "scalar registers", NumSgpr, MaxAddressableNumSGPRs, DS_Error,
491 return;
492 }
493 }
494
495 MCSymbol *NumVgprSymbol =
496 RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
497 MCSymbol *NumAgprSymbol =
498 RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
499 uint64_t NumVgpr, NumAgpr;
500
501 MachineModuleInfo &MMI =
503 MachineFunction *MF = MMI.getMachineFunction(F);
504 if (MF && NumVgprSymbol->isVariable() && NumAgprSymbol->isVariable() &&
505 TryGetMCExprValue(NumVgprSymbol->getVariableValue(), NumVgpr) &&
506 TryGetMCExprValue(NumAgprSymbol->getVariableValue(), NumAgpr)) {
507 const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
508 unsigned MaxWaves = MFI.getMaxWavesPerEU();
509 uint64_t TotalNumVgpr =
510 getTotalNumVGPRs(STM.hasGFX90AInsts(), NumAgpr, NumVgpr);
511 uint64_t NumVGPRsForWavesPerEU =
512 std::max({TotalNumVgpr, (uint64_t)1,
513 (uint64_t)STM.getMinNumVGPRs(
514 MaxWaves, MFI.getDynamicVGPRBlockSize())});
515 uint64_t NumSGPRsForWavesPerEU = std::max(
516 {NumSgpr, (uint64_t)1, (uint64_t)STM.getMinNumSGPRs(MaxWaves)});
517 const MCExpr *OccupancyExpr = createOccupancy(
518 STM.getOccupancyWithWorkGroupSizes(*MF).second,
519 MCConstantExpr::create(NumSGPRsForWavesPerEU, OutContext),
520 MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext),
522 uint64_t Occupancy;
523
524 const auto [MinWEU, MaxWEU] = AMDGPU::getIntegerPairAttribute(
525 F, "amdgpu-waves-per-eu", {0, 0}, true);
526
527 if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {
528 DiagnosticInfoOptimizationFailure Diag(
529 F, F.getSubprogram(),
530 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
531 "'" +
532 F.getName() + "': desired occupancy was " + Twine(MinWEU) +
533 ", final occupancy is " + Twine(Occupancy));
534 F.getContext().diagnose(Diag);
535 return;
536 }
537 }
538 }
539}
540
541static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL,
542 bool IsReturnType) {
543 if (Ty->isVoidTy()) {
544 Enc += 'v';
545 return;
546 }
547 unsigned Bits = DL.getTypeSizeInBits(Ty);
548 // Zero-sized non-void types (e.g. `{}` or `[0 x i8]`) consume no ABI
549 // registers. For returns, emit the same no-result marker as void so the
550 // parameter encoding still has an explicit return-type prefix.
551 if (Bits == 0) {
552 if (IsReturnType)
553 Enc += 'v';
554 return;
555 }
556 if (Bits <= 32)
557 Enc += 'i';
558 else if (Bits <= 64)
559 Enc += 'l';
560 else
561 Enc.append(divideCeil(Bits, 32), 'i');
562}
563
564static std::string computeTypeId(const FunctionType *FTy,
565 const DataLayout &DL) {
566 std::string Enc;
567 appendTypeEncoding(Enc, FTy->getReturnType(), DL, /*IsReturnType=*/true);
568 for (Type *ParamTy : FTy->params())
569 appendTypeEncoding(Enc, ParamTy, DL, /*IsReturnType=*/false);
570 return Enc;
571}
572
573void AMDGPUAsmPrinter::collectCallEdge(const MachineInstr &MI) {
575 return;
576 const SIInstrInfo *TII = MF->getSubtarget<GCNSubtarget>().getInstrInfo();
577 const MachineOperand *Callee =
578 TII->getNamedOperand(MI, AMDGPU::OpName::callee);
579 if (!Callee || !Callee->isGlobal())
580 return;
581 DirectCallEdges.insert(
582 {getSymbol(&MF->getFunction()), getSymbol(Callee->getGlobal())});
583}
584
585void AMDGPUAsmPrinter::emitAMDGPUInfo(Module &M) {
587 return;
588
589 const NamedMDNode *LDSMD = M.getNamedMetadata("amdgpu.lds.uses");
590 bool HasLDSUses = LDSMD && LDSMD->getNumOperands() > 0;
591
592 const NamedMDNode *BarMD = M.getNamedMetadata("amdgpu.named_barrier.uses");
593 bool HasNamedBarriers = BarMD && BarMD->getNumOperands() > 0;
594
595 // Collect address-taken functions (with type IDs) and indirect call sites.
596 DenseMap<const Function *, std::string> AddrTakenTypeIds;
597 using IndirectCallInfo = std::pair<const Function *, std::string>;
599
600 for (const Function &F : M) {
601 bool IsKernel = AMDGPU::isKernel(F.getCallingConv());
602
603 if (!IsKernel && F.hasAddressTaken(/*PutOffender=*/nullptr,
604 /*IgnoreCallbackUses=*/false,
605 /*IgnoreAssumeLikeCalls=*/true,
606 /*IgnoreLLVMUsed=*/true)) {
607 AddrTakenTypeIds[&F] =
608 computeTypeId(F.getFunctionType(), M.getDataLayout());
609 }
610
611 if (F.isDeclaration())
612 continue;
613
614 StringSet<> SeenTypeIds;
615 for (const BasicBlock &BB : F) {
616 for (const Instruction &I : BB) {
617 const auto *CB = dyn_cast<CallBase>(&I);
618 if (!CB || !CB->isIndirectCall())
619 continue;
620 std::string TId =
621 computeTypeId(CB->getFunctionType(), M.getDataLayout());
622 if (SeenTypeIds.insert(TId).second)
623 IndirectCalls.push_back({&F, std::move(TId)});
624 }
625 }
626 }
627
628 if (FunctionInfos.empty() && DirectCallEdges.empty() && !HasLDSUses &&
629 !HasNamedBarriers && AddrTakenTypeIds.empty() && IndirectCalls.empty())
630 return;
631
632 AMDGPU::InfoSectionData Data;
633 Data.Funcs = std::move(FunctionInfos);
634
635 for (auto &[F, TypeId] : AddrTakenTypeIds) {
636 MCSymbol *Sym = getSymbol(F);
637 Data.TypeIds.push_back({Sym, TypeId});
638 }
639
640 for (auto &[CallerSym, CalleeSym] : DirectCallEdges)
641 Data.Calls.push_back({CallerSym, CalleeSym});
642 DirectCallEdges.clear();
643
644 if (HasLDSUses) {
645 for (const MDNode *N : LDSMD->operands()) {
646 auto *Func = mdconst::extract<Function>(N->getOperand(0));
647 auto *LdsVar = mdconst::extract<GlobalVariable>(N->getOperand(1));
648 Data.Uses.push_back({getSymbol(Func), getSymbol(LdsVar)});
649 }
650 }
651
652 if (HasNamedBarriers) {
653 for (const MDNode *N : BarMD->operands()) {
654 auto *BarVar = mdconst::extract<GlobalVariable>(N->getOperand(0));
655 MCSymbol *BarSym = getSymbol(BarVar);
656 for (unsigned I = 1, E = N->getNumOperands(); I < E; ++I) {
657 auto *Func = mdconst::extract<Function>(N->getOperand(I));
658 Data.Uses.push_back({getSymbol(Func), BarSym});
659 }
660 }
661 }
662
663 for (auto &[Caller, Enc] : IndirectCalls) {
664 MCSymbol *CallerSym = getSymbol(Caller);
665 Data.IndirectCalls.push_back({CallerSym, Enc});
666 }
667
669}
670
672 // Pad with s_code_end to help tools and guard against instruction prefetch
673 // causing stale data in caches. Arguably this should be done by the linker,
674 // which is why this isn't done for Mesa.
675 // Don't do it if there is no code.
676 const MCSubtargetInfo &STI = *getGlobalSTI();
677 if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&
681 if (TextSect->hasInstructions()) {
682 OutStreamer->switchSection(TextSect);
684 }
685 }
686
687 // Emit the unified .amdgpu.info section (per-function resources, call graph,
688 // LDS/named-barrier use edges, indirect calls, and address-taken type IDs).
689 emitAMDGPUInfo(M);
690
691 // Assign expressions which can only be resolved when all other functions are
692 // known.
693 RI.finalize(OutContext);
694
695 // Switch section and emit all GPR maximums within the processed module.
696 OutStreamer->pushSection();
697 MCSectionELF *MaxGPRSection =
698 OutContext.getELFSection(".AMDGPU.gpr_maximums", ELF::SHT_PROGBITS, 0);
699 OutStreamer->switchSection(MaxGPRSection);
701 RI.getMaxVGPRSymbol(OutContext), RI.getMaxAGPRSymbol(OutContext),
702 RI.getMaxSGPRSymbol(OutContext), RI.getMaxNamedBarrierSymbol(OutContext));
703 OutStreamer->popSection();
704
705 // In the object-linking pipeline per-function resource MCExprs reference
706 // external callee symbols that cannot be evaluated here, so cross-TU limit
707 // checks would silently no-op for every non-leaf function. Defer resource
708 // sanity checking to the linker, which re-validates against the aggregated
709 // call graph in the combined .amdgpu.info metadata.
711 for (Function &F : M.functions())
712 validateMCResourceInfo(F);
713 }
714
715 RI.reset();
716
718}
719
720SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {
722 raw_svector_ostream OSS(Str);
723 auto &Streamer = getTargetStreamer()->getStreamer();
724 auto &Context = Streamer.getContext();
725 const MCExpr *New = foldAMDGPUMCExpr(Value, Context);
726 printAMDGPUMCExpr(New, OSS, &MAI);
727 return Str;
728}
729
730// Print comments that apply to both callable functions and entry points.
731void AMDGPUAsmPrinter::emitCommonFunctionComments(
732 const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR,
733 const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize,
734 const AMDGPUMachineFunctionInfo *MFI) {
735 OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
736 OutStreamer->emitRawComment(" TotalNumSgprs: " + getMCExprStr(NumSGPR),
737 false);
738 OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false);
739 if (NumAGPR && TotalNumVGPR) {
740 OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false);
741 OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),
742 false);
743 }
744 OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize),
745 false);
746 OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
747 false);
748}
749
750const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
751 const MachineFunction &MF) const {
752 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
753 MCContext &Ctx = MF.getContext();
754 uint16_t KernelCodeProperties = 0;
755 const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
756
757 if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
758 KernelCodeProperties |=
759 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
760 }
761 if (UserSGPRInfo.hasDispatchPtr()) {
762 KernelCodeProperties |=
763 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
764 }
765 if (UserSGPRInfo.hasQueuePtr()) {
766 KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
767 }
768 if (UserSGPRInfo.hasKernargSegmentPtr()) {
769 KernelCodeProperties |=
770 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
771 }
772 if (UserSGPRInfo.hasDispatchID()) {
773 KernelCodeProperties |=
774 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
775 }
776 if (UserSGPRInfo.hasFlatScratchInit()) {
777 KernelCodeProperties |=
778 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
779 }
780 if (UserSGPRInfo.hasPrivateSegmentSize()) {
781 KernelCodeProperties |=
782 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
783 }
784 if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
785 KernelCodeProperties |=
786 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
787 }
788
789 // CurrentProgramInfo.DynamicCallStack is a MCExpr and could be
790 // un-evaluatable at this point so it cannot be conditionally checked here.
791 // Instead, we'll directly shift the possibly unknown MCExpr into its place
792 // and bitwise-or it into KernelCodeProperties.
793 const MCExpr *KernelCodePropExpr =
794 MCConstantExpr::create(KernelCodeProperties, Ctx);
795 const MCExpr *OrValue = MCConstantExpr::create(
796 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);
797 OrValue = MCBinaryExpr::createShl(CurrentProgramInfo.DynamicCallStack,
798 OrValue, Ctx);
799 KernelCodePropExpr = MCBinaryExpr::createOr(KernelCodePropExpr, OrValue, Ctx);
800
801 return KernelCodePropExpr;
802}
803
804MCKernelDescriptor
805AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
806 const SIProgramInfo &PI) const {
807 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
808 const Function &F = MF.getFunction();
809 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
810 MCContext &Ctx = MF.getContext();
811
812 MCKernelDescriptor KernelDescriptor;
813
814 KernelDescriptor.group_segment_fixed_size =
816 KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
817
818 Align MaxKernArgAlign;
819 KernelDescriptor.kernarg_size = MCConstantExpr::create(
820 STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);
821
822 KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);
823 KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(STM, Ctx);
824 KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
825
826 int64_t PGM_Rsrc3 = 1;
827 bool EvaluatableRsrc3 =
828 CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGM_Rsrc3);
829 (void)PGM_Rsrc3;
830 (void)EvaluatableRsrc3;
832 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() || !EvaluatableRsrc3 ||
833 static_cast<uint64_t>(PGM_Rsrc3) == 0);
834 KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3;
835
836 KernelDescriptor.kernarg_preload = MCConstantExpr::create(
837 AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,
838 Ctx);
839
840 return KernelDescriptor;
841}
842
844 // Init target streamer lazily on the first function so that previous passes
845 // can set metadata.
847 initTargetStreamer(*MF.getFunction().getParent());
848
849 ResourceUsage =
851 CurrentProgramInfo.reset(MF);
852
853 const AMDGPUMachineFunctionInfo *MFI =
854 MF.getInfo<AMDGPUMachineFunctionInfo>();
855 MCContext &Ctx = MF.getContext();
856
857 // The starting address of all shader programs must be 256 bytes aligned.
858 // Regular functions just need the basic required instruction alignment.
859 MF.ensureAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));
860
862
863 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
865 // FIXME: This should be an explicit check for Mesa.
866 if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
867 MCSectionELF *ConfigSection =
868 Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
869 OutStreamer->switchSection(ConfigSection);
870 }
871
872 RI.gatherResourceInfo(MF, *ResourceUsage, OutContext);
873
876 *ResourceUsage;
877 FunctionInfos.push_back(
878 {/*NumSGPR=*/static_cast<uint32_t>(RU.NumExplicitSGPR),
879 /*NumArchVGPR=*/static_cast<uint32_t>(RU.NumVGPR),
880 /*NumAccVGPR=*/static_cast<uint32_t>(RU.NumAGPR),
881 /*PrivateSegmentSize=*/static_cast<uint32_t>(RU.PrivateSegmentSize),
882 /*UsesVCC=*/RU.UsesVCC,
883 /*UsesFlatScratch=*/RU.UsesFlatScratch,
884 /*HasDynStack=*/RU.HasDynamicallySizedStack,
885 /*Sym=*/getSymbol(&MF.getFunction())});
886 }
887
888 if (MFI->isModuleEntryFunction()) {
889 getSIProgramInfo(CurrentProgramInfo, MF);
890 }
891
892 if (STM.isAmdPalOS()) {
893 if (MFI->isEntryFunction())
894 EmitPALMetadata(MF, CurrentProgramInfo);
895 else if (MFI->isModuleEntryFunction())
896 emitPALFunctionMetadata(MF);
897 } else if (!STM.isAmdHsaOS()) {
898 EmitProgramInfoSI(MF, CurrentProgramInfo);
899 }
900
901 DumpCodeInstEmitter = nullptr;
902 if (STM.dumpCode()) {
903 // For -dumpcode, get the assembler out of the streamer. This only works
904 // with -filetype=obj.
905 MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
906 if (Assembler)
907 DumpCodeInstEmitter = Assembler->getEmitterPtr();
908 }
909
910 DisasmLines.clear();
911 HexLines.clear();
913
915
916 emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
917 STM.hasMAIInsts());
918
919 {
922 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
923 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
924 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
925 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumNamedBarrier,
926 OutContext),
927 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
928 OutContext),
929 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
930 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
931 OutContext),
932 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
933 OutContext),
934 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
935 OutContext),
936 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
937 OutContext));
938 }
939
940 // Emit _dvgpr$ symbol when appropriate.
941 emitDVgprSymbol(MF);
942
943 if (isVerbose()) {
944 MCSectionELF *CommentSection =
945 Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
946 OutStreamer->switchSection(CommentSection);
947
948 if (!MFI->isEntryFunction()) {
950 OutStreamer->emitRawComment(" Function info:", false);
951
952 emitCommonFunctionComments(
953 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
954 ->getVariableValue(),
955 STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
956 RIK::RIK_NumAGPR, OutContext)
957 ->getVariableValue()
958 : nullptr,
959 RI.createTotalNumVGPRs(MF, Ctx),
960 RI.createTotalNumSGPRs(
961 MF,
962 MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
963 Ctx),
964 RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
966 ->getVariableValue(),
967 CurrentProgramInfo.getFunctionCodeSize(MF), MFI);
968 return false;
969 }
970
971 OutStreamer->emitRawComment(" Kernel info:", false);
972 emitCommonFunctionComments(
973 CurrentProgramInfo.NumArchVGPR,
974 STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr,
975 CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
976 CurrentProgramInfo.ScratchSize,
977 CurrentProgramInfo.getFunctionCodeSize(MF), MFI);
978
979 OutStreamer->emitRawComment(
980 " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
981 OutStreamer->emitRawComment(
982 " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
983 OutStreamer->emitRawComment(
984 " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
985 " bytes/workgroup (compile time only)",
986 false);
987
988 OutStreamer->emitRawComment(
989 " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false);
990
991 OutStreamer->emitRawComment(
992 " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false);
993
994 OutStreamer->emitRawComment(
995 " NumSGPRsForWavesPerEU: " +
996 getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),
997 false);
998 OutStreamer->emitRawComment(
999 " NumVGPRsForWavesPerEU: " +
1000 getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),
1001 false);
1002
1003 if (STM.hasGFX90AInsts()) {
1004 const MCExpr *AdjustedAccum = MCBinaryExpr::createAdd(
1005 CurrentProgramInfo.AccumOffset, MCConstantExpr::create(1, Ctx), Ctx);
1006 AdjustedAccum = MCBinaryExpr::createMul(
1007 AdjustedAccum, MCConstantExpr::create(4, Ctx), Ctx);
1008 OutStreamer->emitRawComment(
1009 " AccumOffset: " + getMCExprStr(AdjustedAccum), false);
1010 }
1011
1012 if (STM.hasGFX1250Insts())
1013 OutStreamer->emitRawComment(
1014 " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
1015 false);
1016
1017 OutStreamer->emitRawComment(
1018 " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);
1019
1020 OutStreamer->emitRawComment(
1021 " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
1022
1023 OutStreamer->emitRawComment(
1024 " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
1025 getMCExprStr(CurrentProgramInfo.ScratchEnable),
1026 false);
1027 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
1028 Twine(CurrentProgramInfo.UserSGPR),
1029 false);
1030 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
1031 Twine(CurrentProgramInfo.TrapHandlerEnable),
1032 false);
1033 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
1034 Twine(CurrentProgramInfo.TGIdXEnable),
1035 false);
1036 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
1037 Twine(CurrentProgramInfo.TGIdYEnable),
1038 false);
1039 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
1040 Twine(CurrentProgramInfo.TGIdZEnable),
1041 false);
1042 OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
1043 Twine(CurrentProgramInfo.TIdIGCompCount),
1044 false);
1045
1046 [[maybe_unused]] int64_t PGMRSrc3;
1048 STM.hasGFX90AInsts() || STM.hasGFX1250Insts() ||
1049 (CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
1050 static_cast<uint64_t>(PGMRSrc3) == 0));
1051 if (STM.hasGFX90AInsts()) {
1052 OutStreamer->emitRawComment(
1053 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
1054 getMCExprStr(MCKernelDescriptor::bits_get(
1055 CurrentProgramInfo.ComputePGMRSrc3,
1056 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
1057 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
1058 false);
1059 OutStreamer->emitRawComment(
1060 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
1061 getMCExprStr(MCKernelDescriptor::bits_get(
1062 CurrentProgramInfo.ComputePGMRSrc3,
1063 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
1064 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
1065 false);
1066 }
1067 }
1068
1069 if (DumpCodeInstEmitter) {
1070
1071 OutStreamer->switchSection(
1072 Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));
1073
1074 for (size_t i = 0; i < DisasmLines.size(); ++i) {
1075 std::string Comment = "\n";
1076 if (!HexLines[i].empty()) {
1077 Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
1078 Comment += " ; " + HexLines[i] + "\n";
1079 }
1080
1081 OutStreamer->emitBytes(StringRef(DisasmLines[i]));
1082 OutStreamer->emitBytes(StringRef(Comment));
1083 }
1084 }
1085
1086 return false;
1087}
1088
1089// When appropriate, add a _dvgpr$ symbol, with the value of the function
1090// symbol, plus an offset encoding one less than the number of VGPR blocks used
1091// by the function in bits 5..3 of the symbol value. A "VGPR block" can be
1092// either 16 VGPRs (for a max of 128), or 32 VGPRs (for a max of 256). This is
1093// used by a front-end to have functions that are chained rather than called,
1094// and a dispatcher that dynamically resizes the VGPR count before dispatching
1095// to a function.
1096void AMDGPUAsmPrinter::emitDVgprSymbol(MachineFunction &MF) {
1098 if (MFI.isDynamicVGPREnabled() &&
1100 MCContext &Ctx = MF.getContext();
1101 unsigned BlockSize = MFI.getDynamicVGPRBlockSize();
1102 MCValue NumVGPRs;
1103 if (!CurrentProgramInfo.NumVGPRsForWavesPerEU->evaluateAsRelocatable(
1104 NumVGPRs, nullptr) ||
1105 !NumVGPRs.isAbsolute()) {
1106 llvm_unreachable("unable to resolve NumVGPRs for _dvgpr$ symbol");
1107 }
1108 // Calculate number of VGPR blocks.
1109 // Treat 0 VGPRs as 1 VGPR to avoid underflowing.
1110 unsigned NumBlocks =
1111 divideCeil(std::max(unsigned(NumVGPRs.getConstant()), 1U), BlockSize);
1112
1113 if (NumBlocks > 8) {
1115 "too many DVGPR blocks for _dvgpr$ symbol for '" +
1116 Twine(CurrentFnSym->getName()) + "'");
1117 return;
1118 }
1119 unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;
1120 // Add to function symbol to create _dvgpr$ symbol.
1121 const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(
1123 MCConstantExpr::create(EncodedNumBlocks, Ctx), Ctx);
1124 MCSymbol *DVgprFuncSym =
1125 Ctx.getOrCreateSymbol(Twine("_dvgpr$") + CurrentFnSym->getName());
1126 OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);
1127 emitVisibility(DVgprFuncSym, MF.getFunction().getVisibility());
1128 emitLinkage(&MF.getFunction(), DVgprFuncSym);
1129 }
1130}
1131
1132// TODO: Fold this into emitFunctionBodyStart.
1133void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
1134 // In the beginning all features are either 'Any' or 'NotSupported',
1135 // depending on global target features. This will cover empty modules.
1137 getGlobalSTI()->getFeatureString());
1138
1139 // If module is empty, we are done.
1140 if (M.empty())
1141 return;
1142
1143 // If module is not empty, need to find first 'Off' or 'On' feature
1144 // setting per feature from functions in module.
1145 for (auto &F : M) {
1146 auto &TSTargetID = getTargetStreamer()->getTargetID();
1147 if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&
1148 (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))
1149 break;
1150
1151 const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
1152 const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();
1153 if (TSTargetID->isXnackSupported())
1154 if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)
1155 TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());
1156 if (TSTargetID->isSramEccSupported())
1157 if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)
1158 TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());
1159 }
1160}
1161
1162// AccumOffset computed for the MCExpr equivalent of:
1163// alignTo(std::max(1, NumVGPR), 4) / 4 - 1;
1164static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {
1165 const MCExpr *ConstFour = MCConstantExpr::create(4, Ctx);
1166 const MCExpr *ConstOne = MCConstantExpr::create(1, Ctx);
1167
1168 // Can't be lower than 1 for subsequent alignTo.
1169 const MCExpr *MaximumTaken =
1170 AMDGPUMCExpr::createMax({ConstOne, NumVGPR}, Ctx);
1171
1172 // Practically, it's computing divideCeil(MaximumTaken, 4).
1173 const MCExpr *DivCeil = MCBinaryExpr::createDiv(
1174 AMDGPUMCExpr::createAlignTo(MaximumTaken, ConstFour, Ctx), ConstFour,
1175 Ctx);
1176
1177 return MCBinaryExpr::createSub(DivCeil, ConstOne, Ctx);
1178}
1179
1180void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1181 const MachineFunction &MF) {
1182 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1183 MCContext &Ctx = MF.getContext();
1184
1185 auto CreateExpr = [&Ctx](int64_t Value) {
1186 return MCConstantExpr::create(Value, Ctx);
1187 };
1188
1189 auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
1190 int64_t Val;
1191 if (Value->evaluateAsAbsolute(Val)) {
1192 Res = Val;
1193 return true;
1194 }
1195 return false;
1196 };
1197
1198 auto GetSymRefExpr =
1199 [&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
1200 MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
1201 return MCSymbolRefExpr::create(Sym, Ctx);
1202 };
1203
1205 ProgInfo.NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);
1206 ProgInfo.NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);
1208 ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
1209
1210 ProgInfo.AccumOffset = computeAccumOffset(ProgInfo.NumArchVGPR, Ctx);
1211 ProgInfo.TgSplit = STM.isTgSplitEnabled();
1212 ProgInfo.NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);
1213 ProgInfo.ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);
1214 ProgInfo.VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);
1215 ProgInfo.FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);
1216 ProgInfo.DynamicCallStack =
1217 MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack),
1218 GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);
1219
1220 const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);
1221 const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(
1222 GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);
1223 ProgInfo.NamedBarCnt = MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);
1224
1225 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1226
1227 // The calculations related to SGPR/VGPR blocks are
1228 // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
1229 // unified.
1230 const MCExpr *ExtraSGPRs = AMDGPUMCExpr::createExtraSGPRs(
1231 ProgInfo.VCCUsed, ProgInfo.FlatUsed,
1232 getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx);
1233
1234 // Check the addressable register limit before we add ExtraSGPRs.
1236 !STM.hasSGPRInitBug()) {
1237 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
1238 uint64_t NumSgpr;
1239 if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
1240 NumSgpr > MaxAddressableNumSGPRs) {
1241 // This can happen due to a compiler bug or when using inline asm.
1242 LLVMContext &Ctx = MF.getFunction().getContext();
1243 Ctx.diagnose(DiagnosticInfoResourceLimit(
1244 MF.getFunction(), "addressable scalar registers", NumSgpr,
1245 MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit));
1246 ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);
1247 }
1248 }
1249
1250 // Account for extra SGPRs and VGPRs reserved for debugger use.
1251 ProgInfo.NumSGPR = MCBinaryExpr::createAdd(ProgInfo.NumSGPR, ExtraSGPRs, Ctx);
1252
1253 const Function &F = MF.getFunction();
1254
1255 // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
1256 // dispatch registers as function args.
1257 unsigned WaveDispatchNumSGPR = MFI->getNumWaveDispatchSGPRs(),
1258 WaveDispatchNumVGPR = MFI->getNumWaveDispatchVGPRs();
1259
1260 if (WaveDispatchNumSGPR) {
1262 {ProgInfo.NumSGPR,
1263 MCBinaryExpr::createAdd(CreateExpr(WaveDispatchNumSGPR), ExtraSGPRs,
1264 Ctx)},
1265 Ctx);
1266 }
1267
1268 if (WaveDispatchNumVGPR) {
1270 {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);
1271
1273 ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);
1274 }
1275
1276 // Adjust number of registers used to meet default/requested minimum/maximum
1277 // number of waves per execution unit request.
1278 unsigned MaxWaves = MFI->getMaxWavesPerEU();
1279 ProgInfo.NumSGPRsForWavesPerEU =
1280 AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, CreateExpr(1ul),
1281 CreateExpr(STM.getMinNumSGPRs(MaxWaves))},
1282 Ctx);
1283 ProgInfo.NumVGPRsForWavesPerEU =
1284 AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul),
1285 CreateExpr(STM.getMinNumVGPRs(
1286 MaxWaves, MFI->getDynamicVGPRBlockSize()))},
1287 Ctx);
1288
1290 STM.hasSGPRInitBug()) {
1291 unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
1292 uint64_t NumSgpr;
1293 if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&
1294 NumSgpr > MaxAddressableNumSGPRs) {
1295 // This can happen due to a compiler bug or when using inline asm to use
1296 // the registers which are usually reserved for vcc etc.
1297 LLVMContext &Ctx = MF.getFunction().getContext();
1298 Ctx.diagnose(DiagnosticInfoResourceLimit(
1299 MF.getFunction(), "scalar registers", NumSgpr, MaxAddressableNumSGPRs,
1301 ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);
1302 ProgInfo.NumSGPRsForWavesPerEU = CreateExpr(MaxAddressableNumSGPRs);
1303 }
1304 }
1305
1306 if (STM.hasSGPRInitBug()) {
1307 ProgInfo.NumSGPR =
1309 ProgInfo.NumSGPRsForWavesPerEU =
1311 }
1312
1313 if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
1314 LLVMContext &Ctx = MF.getFunction().getContext();
1315 Ctx.diagnose(DiagnosticInfoResourceLimit(
1316 MF.getFunction(), "user SGPRs", MFI->getNumUserSGPRs(),
1318 }
1319
1320 if (MFI->getLDSSize() > STM.getAddressableLocalMemorySize()) {
1321 LLVMContext &Ctx = MF.getFunction().getContext();
1322 Ctx.diagnose(DiagnosticInfoResourceLimit(
1323 MF.getFunction(), "local memory", MFI->getLDSSize(),
1325 }
1326 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
1327 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
1328 auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,
1329 unsigned Granule) {
1330 const MCExpr *OneConst = CreateExpr(1ul);
1331 const MCExpr *GranuleConst = CreateExpr(Granule);
1332 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
1333 const MCExpr *AlignToGPR =
1334 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
1335 const MCExpr *DivGPR =
1336 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
1337 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
1338 return SubGPR;
1339 };
1340 // GFX10+ will always allocate 128 SGPRs and this field must be 0
1342 ProgInfo.SGPRBlocks = CreateExpr(0ul);
1343 } else {
1344 ProgInfo.SGPRBlocks = GetNumGPRBlocks(
1346 }
1347 ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,
1349
1350 const SIModeRegisterDefaults Mode = MFI->getMode();
1351
1352 // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
1353 // register.
1354 ProgInfo.FloatMode = getFPMode(Mode);
1355
1356 ProgInfo.IEEEMode = Mode.IEEE;
1357
1358 // Make clamp modifier on NaN input returns 0.
1359 ProgInfo.DX10Clamp = Mode.DX10Clamp;
1360
1361 unsigned LDSAlignShift = 8;
1362 switch (getLdsDwGranularity(STM)) {
1363 case 512:
1364 case 320:
1365 LDSAlignShift = 11;
1366 break;
1367 case 128:
1368 LDSAlignShift = 9;
1369 break;
1370 case 64:
1371 LDSAlignShift = 8;
1372 break;
1373 default:
1374 llvm_unreachable("invald LDS block size");
1375 }
1376
1377 ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
1378 ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
1379
1380 ProgInfo.LDSSize = MFI->getLDSSize();
1381 ProgInfo.LDSBlocks =
1382 alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
1383
1384 // The MCExpr equivalent of divideCeil.
1385 auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {
1386 const MCExpr *Ceil =
1387 AMDGPUMCExpr::createAlignTo(Numerator, Denominator, Ctx);
1388 return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx);
1389 };
1390
1391 // Scratch is allocated in 64-dword or 256-dword blocks.
1392 unsigned ScratchAlignShift =
1393 STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
1394 // We need to program the hardware with the amount of scratch memory that
1395 // is used by the entire wave. ProgInfo.ScratchSize is the amount of
1396 // scratch memory used per thread.
1397 ProgInfo.ScratchBlocks = DivideCeil(
1399 CreateExpr(STM.getWavefrontSize()), Ctx),
1400 CreateExpr(1ULL << ScratchAlignShift));
1401
1402 if (STM.supportsWGP()) {
1403 ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
1404 }
1405
1406 if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
1407 ProgInfo.MemOrdered = 1;
1408 ProgInfo.FwdProgress = !F.hasFnAttribute("amdgpu-no-fwd-progress");
1409 }
1410
1411 // 0 = X, 1 = XY, 2 = XYZ
1412 unsigned TIDIGCompCnt = 0;
1413 if (MFI->hasWorkItemIDZ())
1414 TIDIGCompCnt = 2;
1415 else if (MFI->hasWorkItemIDY())
1416 TIDIGCompCnt = 1;
1417
1418 // The private segment wave byte offset is the last of the system SGPRs. We
1419 // initially assumed it was allocated, and may have used it. It shouldn't harm
1420 // anything to disable it if we know the stack isn't used here. We may still
1421 // have emitted code reading it to initialize scratch, but if that's unused
1422 // reading garbage should be OK.
1425 MCConstantExpr::create(0, Ctx), Ctx),
1426 ProgInfo.DynamicCallStack, Ctx);
1427
1428 ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
1429 // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
1430 ProgInfo.TrapHandlerEnable = STM.isAmdHsaOS() ? 0 : STM.hasTrapHandler();
1431 ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();
1432 ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();
1433 ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();
1434 ProgInfo.TGSizeEnable = MFI->hasWorkGroupInfo();
1435 ProgInfo.TIdIGCompCount = TIDIGCompCnt;
1436 ProgInfo.EXCPEnMSB = 0;
1437 // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
1438 ProgInfo.LdsSize = STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks;
1439 ProgInfo.EXCPEnable = 0;
1440
1441 // return ((Dst & ~Mask) | (Value << Shift))
1442 auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1443 uint32_t Shift) {
1444 const auto *Shft = MCConstantExpr::create(Shift, Ctx);
1445 const auto *Msk = MCConstantExpr::create(Mask, Ctx);
1446 Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);
1448 Ctx);
1449 return Dst;
1450 };
1451
1452 if (STM.hasGFX90AInsts()) {
1453 ProgInfo.ComputePGMRSrc3 =
1454 SetBits(ProgInfo.ComputePGMRSrc3, ProgInfo.AccumOffset,
1455 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
1456 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1457 ProgInfo.ComputePGMRSrc3 =
1458 SetBits(ProgInfo.ComputePGMRSrc3, CreateExpr(ProgInfo.TgSplit),
1459 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1460 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
1461 }
1462
1463 if (STM.hasGFX1250Insts())
1464 ProgInfo.ComputePGMRSrc3 =
1465 SetBits(ProgInfo.ComputePGMRSrc3, ProgInfo.NamedBarCnt,
1466 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
1467 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT);
1468
1469 ProgInfo.Occupancy = createOccupancy(
1470 STM.computeOccupancy(F, ProgInfo.LDSSize).second,
1472 MFI->getDynamicVGPRBlockSize(), STM, Ctx);
1473
1474 const auto [MinWEU, MaxWEU] =
1475 AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
1476 uint64_t Occupancy;
1477 if (TryGetMCExprValue(ProgInfo.Occupancy, Occupancy) && Occupancy < MinWEU) {
1478 DiagnosticInfoOptimizationFailure Diag(
1479 F, F.getSubprogram(),
1480 "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
1481 "'" +
1482 F.getName() + "': desired occupancy was " + Twine(MinWEU) +
1483 ", final occupancy is " + Twine(Occupancy));
1484 F.getContext().diagnose(Diag);
1485 }
1486
1487 if (isGFX11Plus(STM)) {
1488 uint32_t CodeSizeInBytes = (uint32_t)std::min(
1489 ProgInfo.getFunctionCodeSize(MF, true /* IsLowerBound */),
1490 (uint64_t)std::numeric_limits<uint32_t>::max());
1491 uint32_t CodeSizeInLines = divideCeil(CodeSizeInBytes, 128);
1492 uint32_t Field, Shift, Width;
1493 if (isGFX11(STM)) {
1494 Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1495 Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1496 Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1497 } else {
1498 Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1499 Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1500 Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1501 }
1502 uint64_t InstPrefSize = std::min(CodeSizeInLines, (1u << Width) - 1);
1503 ProgInfo.ComputePGMRSrc3 = SetBits(ProgInfo.ComputePGMRSrc3,
1504 CreateExpr(InstPrefSize), Field, Shift);
1505 }
1506}
1507
1508static unsigned getRsrcReg(CallingConv::ID CallConv) {
1509 switch (CallConv) {
1510 default:
1511 [[fallthrough]];
1526 }
1527}
1528
1529void AMDGPUAsmPrinter::EmitProgramInfoSI(
1530 const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) {
1531 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1532 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1533 unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
1534 MCContext &Ctx = MF.getContext();
1535
1536 // (((Value) & Mask) << Shift)
1537 auto SetBits = [&Ctx](const MCExpr *Value, uint32_t Mask, uint32_t Shift) {
1538 const MCExpr *msk = MCConstantExpr::create(Mask, Ctx);
1539 const MCExpr *shft = MCConstantExpr::create(Shift, Ctx);
1541 shft, Ctx);
1542 };
1543
1544 auto EmitResolvedOrExpr = [this](const MCExpr *Value, unsigned Size) {
1545 int64_t Val;
1546 if (Value->evaluateAsAbsolute(Val))
1547 OutStreamer->emitIntValue(static_cast<uint64_t>(Val), Size);
1548 else
1549 OutStreamer->emitValue(Value, Size);
1550 };
1551
1552 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
1554
1555 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),
1556 /*Size=*/4);
1557
1559 EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx),
1560 /*Size=*/4);
1561
1563
1564 // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the
1565 // appropriate generation.
1566 if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {
1567 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1568 /*Mask=*/0x3FFFF, /*Shift=*/12),
1569 /*Size=*/4);
1570 } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {
1571 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1572 /*Mask=*/0x7FFF, /*Shift=*/12),
1573 /*Size=*/4);
1574 } else {
1575 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1576 /*Mask=*/0x1FFF, /*Shift=*/12),
1577 /*Size=*/4);
1578 }
1579
1580 // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
1581 // 0" comment but I don't see a corresponding field in the register spec.
1582 } else {
1583 OutStreamer->emitInt32(RsrcReg);
1584
1585 const MCExpr *GPRBlocks = MCBinaryExpr::createOr(
1586 SetBits(CurrentProgramInfo.VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0),
1587 SetBits(CurrentProgramInfo.SGPRBlocks, /*Mask=*/0x0F, /*Shift=*/6),
1588 MF.getContext());
1589 EmitResolvedOrExpr(GPRBlocks, /*Size=*/4);
1591
1592 // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the
1593 // appropriate generation.
1594 if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {
1595 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1596 /*Mask=*/0x3FFFF, /*Shift=*/12),
1597 /*Size=*/4);
1598 } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {
1599 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1600 /*Mask=*/0x7FFF, /*Shift=*/12),
1601 /*Size=*/4);
1602 } else {
1603 EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,
1604 /*Mask=*/0x1FFF, /*Shift=*/12),
1605 /*Size=*/4);
1606 }
1607 }
1608
1609 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
1611 unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1612 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1613 : CurrentProgramInfo.LDSBlocks;
1614 OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
1616 OutStreamer->emitInt32(MFI->getPSInputEnable());
1618 OutStreamer->emitInt32(MFI->getPSInputAddr());
1619 }
1620
1621 OutStreamer->emitInt32(R_SPILLED_SGPRS);
1622 OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());
1623 OutStreamer->emitInt32(R_SPILLED_VGPRS);
1624 OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
1625}
1626
1627// Helper function to add common PAL Metadata 3.0+
1629 const SIProgramInfo &CurrentProgramInfo,
1630 CallingConv::ID CC, const GCNSubtarget &ST,
1631 unsigned DynamicVGPRBlockSize) {
1632 if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
1633 MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
1634
1635 MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
1636 MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
1637 MD->setHwStage(CC, ".forward_progress", (bool)CurrentProgramInfo.FwdProgress);
1638
1639 if (AMDGPU::isCompute(CC)) {
1640 MD->setHwStage(CC, ".trap_present",
1641 (bool)CurrentProgramInfo.TrapHandlerEnable);
1642 MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1643
1644 if (DynamicVGPRBlockSize != 0)
1645 MD->setComputeRegisters(".dynamic_vgpr_en", true);
1646 }
1647
1649 CC, ".lds_size",
1650 (unsigned)(CurrentProgramInfo.LdsSize * getLdsDwGranularity(ST) *
1651 sizeof(uint32_t)));
1652}
1653
1654// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1655// is AMDPAL. It stores each compute/SPI register setting and other PAL
1656// metadata items into the PALMD::Metadata, combining with any provided by the
1657// frontend as LLVM metadata. Once all functions are written, the PAL metadata
1658// is then written as a single block in the .note section.
1659void AMDGPUAsmPrinter::EmitPALMetadata(
1660 const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) {
1661 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1662 auto CC = MF.getFunction().getCallingConv();
1663 auto *MD = getTargetStreamer()->getPALMetadata();
1664 auto &Ctx = MF.getContext();
1665
1666 MD->setEntryPoint(CC, MF.getFunction().getName());
1667 MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
1668
1669 // For targets that support dynamic VGPRs, set the number of saved dynamic
1670 // VGPRs (if any) in the PAL metadata.
1671 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1672 if (MFI->isDynamicVGPREnabled() &&
1674 MD->setHwStage(CC, ".dynamic_vgpr_saved_count",
1676
1677 // Only set AGPRs for supported devices
1678 if (STM.hasMAIInsts()) {
1679 MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
1680 }
1681
1682 MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
1683 if (MD->getPALMajorVersion() < 3) {
1684 MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
1685 if (AMDGPU::isCompute(CC)) {
1686 MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx), Ctx);
1687 } else {
1688 const MCExpr *HasScratchBlocks =
1689 MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,
1690 MCConstantExpr::create(0, Ctx), Ctx);
1691 auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);
1692 MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
1693 }
1694 } else {
1695 MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
1696 MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
1697 CurrentProgramInfo.ScratchEnable);
1698 EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM,
1700 }
1701
1702 // ScratchSize is in bytes, 16 aligned.
1703 MD->setScratchSize(
1704 CC,
1705 AMDGPUMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,
1706 MCConstantExpr::create(16, Ctx), Ctx),
1707 Ctx);
1708
1709 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
1710 unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
1711 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
1712 : CurrentProgramInfo.LDSBlocks;
1713 if (MD->getPALMajorVersion() < 3) {
1714 MD->setRsrc2(
1715 CC,
1717 Ctx);
1718 MD->setSpiPsInputEna(MFI->getPSInputEnable());
1719 MD->setSpiPsInputAddr(MFI->getPSInputAddr());
1720 } else {
1721 // Graphics registers
1722 const unsigned ExtraLdsDwGranularity =
1723 STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 256 : 128;
1724 MD->setGraphicsRegisters(
1725 ".ps_extra_lds_size",
1726 (unsigned)(ExtraLDSSize * ExtraLdsDwGranularity * sizeof(uint32_t)));
1727
1728 // Set PsInputEna and PsInputAddr .spi_ps_input_ena and .spi_ps_input_addr
1729 static StringLiteral const PsInputFields[] = {
1730 ".persp_sample_ena", ".persp_center_ena",
1731 ".persp_centroid_ena", ".persp_pull_model_ena",
1732 ".linear_sample_ena", ".linear_center_ena",
1733 ".linear_centroid_ena", ".line_stipple_tex_ena",
1734 ".pos_x_float_ena", ".pos_y_float_ena",
1735 ".pos_z_float_ena", ".pos_w_float_ena",
1736 ".front_face_ena", ".ancillary_ena",
1737 ".sample_coverage_ena", ".pos_fixed_pt_ena"};
1738 unsigned PSInputEna = MFI->getPSInputEnable();
1739 unsigned PSInputAddr = MFI->getPSInputAddr();
1740 for (auto [Idx, Field] : enumerate(PsInputFields)) {
1741 MD->setGraphicsRegisters(".spi_ps_input_ena", Field,
1742 (bool)((PSInputEna >> Idx) & 1));
1743 MD->setGraphicsRegisters(".spi_ps_input_addr", Field,
1744 (bool)((PSInputAddr >> Idx) & 1));
1745 }
1746 }
1747 }
1748
1749 // For version 3 and above the wave front size is already set in the metadata
1750 if (MD->getPALMajorVersion() < 3 && STM.isWave32())
1751 MD->setWave32(MF.getFunction().getCallingConv());
1752}
1753
1754void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
1755 auto *MD = getTargetStreamer()->getPALMetadata();
1756 const MachineFrameInfo &MFI = MF.getFrameInfo();
1757 StringRef FnName = MF.getFunction().getName();
1758 MD->setFunctionScratchSize(FnName, MFI.getStackSize());
1759 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1760 MCContext &Ctx = MF.getContext();
1761
1762 if (MD->getPALMajorVersion() < 3) {
1763 // Set compute registers
1764 MD->setRsrc1(
1766 CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);
1767 MD->setRsrc2(CallingConv::AMDGPU_CS,
1768 CurrentProgramInfo.getComputePGMRSrc2(ST, Ctx), Ctx);
1769 } else {
1771 MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST,
1772 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());
1773 }
1774
1775 // Set optional info
1776 MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
1777 MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
1778 MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
1779}
1780
1781// This is supposed to be log2(Size)
1783 switch (Size) {
1784 case 4:
1785 return AMD_ELEMENT_4_BYTES;
1786 case 8:
1787 return AMD_ELEMENT_8_BYTES;
1788 case 16:
1789 return AMD_ELEMENT_16_BYTES;
1790 default:
1791 llvm_unreachable("invalid private_element_size");
1792 }
1793}
1794
1795void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
1796 const SIProgramInfo &CurrentProgramInfo,
1797 const MachineFunction &MF) const {
1798 const Function &F = MF.getFunction();
1799 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
1800 F.getCallingConv() == CallingConv::SPIR_KERNEL);
1801
1802 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1803 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1804 MCContext &Ctx = MF.getContext();
1805
1806 Out.initDefault(&STM, Ctx, /*InitMCExpr=*/false);
1807
1809 CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
1811 CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx);
1813
1814 Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;
1815
1817 getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
1818
1819 const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
1820 if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
1822 }
1823
1824 if (UserSGPRInfo.hasDispatchPtr())
1826
1827 if (UserSGPRInfo.hasQueuePtr())
1829
1830 if (UserSGPRInfo.hasKernargSegmentPtr())
1832
1833 if (UserSGPRInfo.hasDispatchID())
1835
1836 if (UserSGPRInfo.hasFlatScratchInit())
1838
1839 if (UserSGPRInfo.hasPrivateSegmentSize())
1841
1842 if (STM.isXNACKEnabled())
1844
1845 Align MaxKernArgAlign;
1846 Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1847 Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1848 Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1849 Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1850 Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1851
1852 // kernarg_segment_alignment is specified as log of the alignment.
1853 // The minimum alignment is 16.
1854 // FIXME: The metadata treats the minimum as 4?
1855 Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
1856}
1857
1859 const char *ExtraCode, raw_ostream &O) {
1860 // First try the generic code, which knows about modifiers like 'c' and 'n'.
1861 if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
1862 return false;
1863
1864 if (ExtraCode && ExtraCode[0]) {
1865 if (ExtraCode[1] != 0)
1866 return true; // Unknown modifier.
1867
1868 switch (ExtraCode[0]) {
1869 case 'r':
1870 break;
1871 default:
1872 return true;
1873 }
1874 }
1875
1876 // TODO: Should be able to support other operand types like globals.
1877 const MachineOperand &MO = MI->getOperand(OpNo);
1878 if (MO.isReg()) {
1880 *MF->getSubtarget().getRegisterInfo());
1881 return false;
1882 }
1883 if (MO.isImm()) {
1884 int64_t Val = MO.getImm();
1886 O << Val;
1887 } else if (isUInt<16>(Val)) {
1888 O << format("0x%" PRIx16, static_cast<uint16_t>(Val));
1889 } else if (isUInt<32>(Val)) {
1890 O << format("0x%" PRIx32, static_cast<uint32_t>(Val));
1891 } else {
1892 O << format("0x%" PRIx64, static_cast<uint64_t>(Val));
1893 }
1894 return false;
1895 }
1896 return true;
1897}
1898
1906
1907void AMDGPUAsmPrinter::emitResourceUsageRemarks(
1908 const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
1909 bool isModuleEntryFunction, bool hasMAIInsts) {
1910 if (!ORE)
1911 return;
1912
1913 const char *Name = "kernel-resource-usage";
1914 const char *Indent = " ";
1915
1916 // If the remark is not specifically enabled, do not output to yaml
1918 if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
1919 return;
1920
1921 // Currently non-kernel functions have no resources to emit.
1923 return;
1924
1925 auto EmitResourceUsageRemark = [&](StringRef RemarkName,
1926 StringRef RemarkLabel, auto Argument) {
1927 // Add an indent for every line besides the line with the kernel name. This
1928 // makes it easier to tell which resource usage go with which kernel since
1929 // the kernel name will always be displayed first.
1930 std::string LabelStr = RemarkLabel.str() + ": ";
1931 if (RemarkName != "FunctionName")
1932 LabelStr = Indent + LabelStr;
1933
1934 ORE->emit([&]() {
1935 return MachineOptimizationRemarkAnalysis(Name, RemarkName,
1937 &MF.front())
1938 << LabelStr << ore::NV(RemarkName, Argument);
1939 });
1940 };
1941
1942 // FIXME: Formatting here is pretty nasty because clang does not accept
1943 // newlines from diagnostics. This forces us to emit multiple diagnostic
1944 // remarks to simulate newlines. If and when clang does accept newlines, this
1945 // formatting should be aggregated into one remark with newlines to avoid
1946 // printing multiple diagnostic location and diag opts.
1947 EmitResourceUsageRemark("FunctionName", "Function Name",
1948 MF.getFunction().getName());
1949 EmitResourceUsageRemark("NumSGPR", "TotalSGPRs",
1950 getMCExprStr(CurrentProgramInfo.NumSGPR));
1951 EmitResourceUsageRemark("NumVGPR", "VGPRs",
1952 getMCExprStr(CurrentProgramInfo.NumArchVGPR));
1953 if (hasMAIInsts) {
1954 EmitResourceUsageRemark("NumAGPR", "AGPRs",
1955 getMCExprStr(CurrentProgramInfo.NumAccVGPR));
1956 }
1957 EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
1958 getMCExprStr(CurrentProgramInfo.ScratchSize));
1959 int64_t DynStack;
1960 bool DynStackEvaluatable =
1961 CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);
1962 StringRef DynamicStackStr =
1963 DynStackEvaluatable && DynStack ? "True" : "False";
1964 EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
1965 EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
1966 getMCExprStr(CurrentProgramInfo.Occupancy));
1967 EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
1968 CurrentProgramInfo.SGPRSpill);
1969 EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
1970 CurrentProgramInfo.VGPRSpill);
1971 if (isModuleEntryFunction)
1972 EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
1973 CurrentProgramInfo.LDSSize);
1974}
1975
1976char AMDGPUAsmPrinter::ID = 0;
1977
1978INITIALIZE_PASS(AMDGPUAsmPrinter, "amdgpu-asm-printer",
1979 "AMDGPU Assembly Printer", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize)
const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
static unsigned getRsrcReg(CallingConv::ID CallConv)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
static std::string computeTypeId(const FunctionType *FTy, const DataLayout &DL)
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL, bool IsReturnType)
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
AMDGPU Assembly printer class.
AMDGPU HSA Metadata Streamer.
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
MC infrastructure to propagate the function level resource usage info.
Analyzes how many registers and other resources are used by functions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
@ AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_4_BYTES
#define AMD_HSA_BITS_SET(dst, mask, val)
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_IS_PTR64
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
OptimizedStructLayoutField Field
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
R600 Assembly printer class.
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition SIDefines.h:1144
#define R_0286E8_SPI_TMPRING_SIZE
Definition SIDefines.h:1286
#define FP_ROUND_MODE_DP(x)
Definition SIDefines.h:1268
#define C_00B84C_SCRATCH_EN
Definition SIDefines.h:1180
#define FP_ROUND_ROUND_TO_NEAREST
Definition SIDefines.h:1260
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition SIDefines.h:1219
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition SIDefines.h:1281
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition SIDefines.h:1167
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition SIDefines.h:1166
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition SIDefines.h:1175
#define R_0286CC_SPI_PS_INPUT_ENA
Definition SIDefines.h:1218
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition SIDefines.h:1153
#define FP_DENORM_MODE_DP(x)
Definition SIDefines.h:1279
#define R_00B848_COMPUTE_PGM_RSRC1
Definition SIDefines.h:1221
#define R_SPILLED_SGPRS
Definition SIDefines.h:1300
#define FP_ROUND_MODE_SP(x)
Definition SIDefines.h:1267
#define FP_DENORM_MODE_SP(x)
Definition SIDefines.h:1278
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition SIDefines.h:1158
#define R_SPILLED_VGPRS
Definition SIDefines.h:1301
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition SIDefines.h:1152
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition SIDefines.h:1177
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition SIDefines.h:1151
StringSet - A set-like wrapper for the StringMap.
static const int BlockSize
Definition TarWriter.cpp:33
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
const MCSubtargetInfo * getGlobalSTI() const
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
std::vector< std::string > DisasmLines
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
void endFunction(const MachineFunction *MF)
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
std::vector< std::string > HexLines
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
AMDGPUTargetStreamer * getTargetStreamer() const
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
AMDGPU target specific MCExpr operations.
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
void setHwStage(unsigned CC, StringRef field, unsigned Val)
void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val)
void setComputeRegisters(StringRef field, unsigned Val)
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
unsigned getWavefrontSize() const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
virtual void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data)
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
void initializeTargetID(const MCSubtargetInfo &STI)
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier)
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Collects and handles AsmPrinter objects required to build debug or EH information.
This class is intended to be used as a driving class for all asm writers.
Definition AsmPrinter.h:91
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
MCSymbol * getSymbol(const GlobalValue *GV) const
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
void emitFunctionBody()
This method emits the body and trailer for a function.
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
unsigned getFunctionNumber() const
Return a unique ID for the current function.
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
Definition AsmPrinter.h:121
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition AsmPrinter.h:112
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106
const MCAsmInfo & MAI
Target Asm Printer information.
Definition AsmPrinter.h:97
bool isVerbose() const
Return true if assembly output should contain comments.
Definition AsmPrinter.h:310
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
void addAsmPrinterHandler(std::unique_ptr< AsmPrinterHandler > Handler)
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool empty() const
Definition DenseMap.h:109
DISubprogram * getSubprogram() const
Get the attached subprogram.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool isTgSplitEnabled() const
bool isCuModeEnabled() const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
bool isWave32() const
bool supportsWGP() const
unsigned getMaxNumUserSGPRs() const
Generation getGeneration() const
unsigned getAddressableNumSGPRs() const
unsigned getMaxWaveScratchSize() const
bool hasPrivateSegmentBuffer() const
VisibilityTypes getVisibility() const
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
unsigned getAddressSpace() const
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:569
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
MCCodeEmitter * getEmitterPtr() const
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:378
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:398
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:363
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:413
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:413
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
LLVM_ABI bool evaluateAsRelocatable(MCValue &Res, const MCAssembler *Asm) const
Try to evaluate the expression to a relocatable value, i.e.
Definition MCExpr.cpp:450
MCSection * getReadOnlySection() const
MCSection * getTextSection() const
MCContext & getContext() const
This represents a section on linux, lots of unix variants and some bare metal systems.
Instances of this class represent a uniqued identifier for a section in the current translation unit.
Definition MCSection.h:573
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
Definition MCSection.h:661
bool hasInstructions() const
Definition MCSection.h:669
MCContext & getContext() const
Definition MCStreamer.h:323
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition MCSymbol.h:233
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition MCSymbol.h:212
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCStreamer & getStreamer()
Definition MCStreamer.h:103
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:273
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
MCContext & getContext() const
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Diagnostic information for optimization analysis remarks.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVM_ABI unsigned getNumOperands() const
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
SIModeRegisterDefaults getMode() const
unsigned getScratchReservedForDynamicVGPRs() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:445
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
bool isGFX10Plus(const MCSubtargetInfo &STI)
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ SHT_PROGBITS
Definition ELF.h:1150
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Target & getTheR600Target()
The target for R600 GPUs.
@ DK_ResourceLimit
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
#define N
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo FunctionResourceInfo
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Track resource usage for kernels / entry functions.
const MCExpr * NumSGPR
const MCExpr * NumArchVGPR
uint64_t getFunctionCodeSize(const MachineFunction &MF, bool IsLowerBound=false)
const MCExpr * VGPRBlocks
const MCExpr * ScratchBlocks
const MCExpr * ComputePGMRSrc3
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
const MCExpr * VCCUsed
const MCExpr * FlatUsed
const MCExpr * NamedBarCnt
const MCExpr * ScratchEnable
const MCExpr * AccumOffset
const MCExpr * NumAccVGPR
const MCExpr * DynamicCallStack
const MCExpr * SGPRBlocks
const MCExpr * NumVGPRsForWavesPerEU
const MCExpr * NumVGPR
const MCExpr * Occupancy
const MCExpr * ScratchSize
const MCExpr * NumSGPRsForWavesPerEU
const MCExpr * getComputePGMRSrc2(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.