LLVM 23.0.0git
AMDGPUTargetStreamer.cpp
Go to the documentation of this file.
1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
14#include "AMDGPUMCExpr.h"
16#include "AMDGPUMCTargetDesc.h"
17#include "AMDGPUPTNote.h"
22#include "llvm/MC/MCAsmInfo.h"
23#include "llvm/MC/MCAssembler.h"
24#include "llvm/MC/MCContext.h"
35
36using namespace llvm;
37using namespace llvm::AMDGPU;
38
39//===----------------------------------------------------------------------===//
40// AMDGPUTargetStreamer
41//===----------------------------------------------------------------------===//
42
44 ForceGenericVersion("amdgpu-force-generic-version",
45 cl::desc("Force a specific generic_v<N> flag to be "
46 "added. For testing purposes only."),
48
50 msgpack::Document HSAMetadataDoc;
51 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
52 return false;
53 return EmitHSAMetadata(HSAMetadataDoc, false);
54}
55
58
59 // clang-format off
60 switch (ElfMach) {
61 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
62 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
63 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
64 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
65 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
66 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
67 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
68 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
69 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
70 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
71 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
72 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
73 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
74 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
75 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
76 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170: AK = GK_GFX1170; break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1171: AK = GK_GFX1171; break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1172: AK = GK_GFX1172; break;
123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250: AK = GK_GFX1250; break;
126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251: AK = GK_GFX1251; break;
127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310: AK = GK_GFX1310; break;
128 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
129 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break;
130 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
131 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
132 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
133 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break;
134 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC: AK = GK_GFX12_5_GENERIC; break;
135 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
136 default: AK = GK_NONE; break;
137 }
138 // clang-format on
139
140 StringRef GPUName = getArchNameAMDGCN(AK);
141 if (GPUName != "")
142 return GPUName;
143 return getArchNameR600(AK);
144}
145
148 if (AK == AMDGPU::GPUKind::GK_NONE)
149 AK = parseArchR600(GPU);
150
151 // clang-format off
152 switch (AK) {
153 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
154 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
155 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
156 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
157 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
158 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
159 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
160 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
161 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
162 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
163 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
164 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
165 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
166 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
167 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
168 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
169 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
170 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
171 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
172 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
173 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
174 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
175 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
176 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
177 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
178 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
179 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
180 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
181 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
182 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
183 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
184 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
185 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
186 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
187 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
188 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
189 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
190 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
191 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
192 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
193 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
194 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
195 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
196 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
197 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
198 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
199 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
200 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
201 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
202 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
203 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
204 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
205 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
206 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
207 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
208 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
209 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
210 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
211 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153;
212 case GK_GFX1170: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170;
213 case GK_GFX1171: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1171;
214 case GK_GFX1172: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1172;
215 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
216 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
217 case GK_GFX1250: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250;
218 case GK_GFX1251: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251;
219 case GK_GFX1310: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310;
220 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
221 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
222 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
223 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
224 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
225 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
226 case GK_GFX12_5_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC;
228 }
229 // clang-format on
230
231 llvm_unreachable("unknown GPU");
232}
233
234//===----------------------------------------------------------------------===//
235// AMDGPUTargetAsmStreamer
236//===----------------------------------------------------------------------===//
237
241
242// A hook for emitting stuff at the end.
243// We use it for emitting the accumulated PAL metadata as directives.
244// The PAL metadata is reset after it is emitted.
246 std::string S;
248 OS << S;
249
250 // Reset the pal metadata so its data will not affect a compilation that
251 // reuses this object.
253}
254
256 OS << "\t.amdgcn_target \"" << *getTargetID() << "\"\n";
257}
258
260 unsigned COV) {
262 OS << "\t.amdhsa_code_object_version " << COV << '\n';
263}
264
266 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
267 const MCAsmInfo *MAI) {
269 };
270
271 OS << "\t.amd_kernel_code_t\n";
272 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint);
273 OS << "\t.end_amd_kernel_code_t\n";
274}
275
277 unsigned Type) {
278 switch (Type) {
279 default:
280 llvm_unreachable("Invalid AMDGPU symbol type");
282 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n';
283 break;
284 }
285}
286
288 Align Alignment) {
289 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
290 << Alignment.value() << '\n';
291}
292
294 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
295 const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier,
296 const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC,
297 const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack,
298 const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) {
299#define PRINT_RES_INFO(ARG) \
300 OS << "\t.set "; \
301 ARG->print(OS, &getContext().getAsmInfo()); \
302 OS << ", "; \
303 getContext().getAsmInfo().printExpr(OS, *ARG->getVariableValue()); \
304 Streamer.addBlankLine();
305
306 PRINT_RES_INFO(NumVGPR);
307 PRINT_RES_INFO(NumAGPR);
308 PRINT_RES_INFO(NumExplicitSGPR);
309 PRINT_RES_INFO(NumNamedBarrier);
310 PRINT_RES_INFO(PrivateSegmentSize);
311 PRINT_RES_INFO(UsesVCC);
312 PRINT_RES_INFO(UsesFlatScratch);
313 PRINT_RES_INFO(HasDynamicallySizedStack);
314 PRINT_RES_INFO(HasRecursion);
315 PRINT_RES_INFO(HasIndirectCall);
316#undef PRINT_RES_INFO
317}
318
320 const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR,
321 const MCSymbol *MaxNamedBarrier) {
322#define PRINT_RES_INFO(ARG) \
323 OS << "\t.set "; \
324 ARG->print(OS, &getContext().getAsmInfo()); \
325 OS << ", "; \
326 getContext().getAsmInfo().printExpr(OS, *ARG->getVariableValue()); \
327 Streamer.addBlankLine();
328
329 PRINT_RES_INFO(MaxVGPR);
330 PRINT_RES_INFO(MaxAGPR);
331 PRINT_RES_INFO(MaxSGPR);
332 PRINT_RES_INFO(MaxNamedBarrier);
333#undef PRINT_RES_INFO
334}
335
337 OS << "\t.amd_amdgpu_isa \"" << getTargetID() << "\"\n";
338 return true;
339}
340
342 bool Strict) {
344 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
345 return false;
346
347 std::string HSAMetadataString;
348 raw_string_ostream StrOS(HSAMetadataString);
349 HSAMetadataDoc.toYAML(StrOS);
350
351 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
352 OS << StrOS.str() << '\n';
353 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
354 return true;
355}
356
358 const uint32_t Encoded_s_code_end = 0xbf9f0000;
359 const uint32_t Encoded_s_nop = 0xbf800000;
360 uint32_t Encoded_pad = Encoded_s_code_end;
361
362 // Instruction cache line size in bytes.
363 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
364 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
365
366 // Extra padding amount in bytes to support prefetch mode 3.
367 unsigned FillSize = 3 * CacheLineSize;
368
369 if (AMDGPU::isGFX90A(STI)) {
370 Encoded_pad = Encoded_s_nop;
371 FillSize = 16 * CacheLineSize;
372 }
373
374 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
375 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
376 return true;
377}
378
380 const MCSubtargetInfo &STI, StringRef KernelName,
381 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
382 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
383 const MCExpr *ReserveFlatScr) {
384 IsaVersion IVersion = getIsaVersion(STI.getCPU());
385 const MCAsmInfo &MAI = getContext().getAsmInfo();
386
387 OS << "\t.amdhsa_kernel " << KernelName << '\n';
388
389 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
391 OS << "\t\t" << Directive << ' ';
392 const MCExpr *ShiftedAndMaskedExpr =
393 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
394 const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext());
395 printAMDGPUMCExpr(New, OS, &MAI);
396 OS << '\n';
397 };
398
399 auto EmitMCExpr = [&](const MCExpr *Value) {
401 printAMDGPUMCExpr(NewExpr, OS, &MAI);
402 };
403
404 OS << "\t\t.amdhsa_group_segment_fixed_size ";
405 EmitMCExpr(KD.group_segment_fixed_size);
406 OS << '\n';
407
408 OS << "\t\t.amdhsa_private_segment_fixed_size ";
409 EmitMCExpr(KD.private_segment_fixed_size);
410 OS << '\n';
411
412 OS << "\t\t.amdhsa_kernarg_size ";
413 EmitMCExpr(KD.kernarg_size);
414 OS << '\n';
415
416 if (isGFX1250Plus(STI)) {
418 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
419 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
420 ".amdhsa_user_sgpr_count");
421 } else {
423 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
424 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
425 ".amdhsa_user_sgpr_count");
426 }
427
431 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
433 ".amdhsa_user_sgpr_private_segment_buffer");
435 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
437 ".amdhsa_user_sgpr_dispatch_ptr");
439 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
441 ".amdhsa_user_sgpr_queue_ptr");
443 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
444 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
445 ".amdhsa_user_sgpr_kernarg_segment_ptr");
447 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
448 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
449 ".amdhsa_user_sgpr_dispatch_id");
452 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
453 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
454 ".amdhsa_user_sgpr_flat_scratch_init");
455 if (hasKernargPreload(STI)) {
456 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
457 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
458 ".amdhsa_user_sgpr_kernarg_preload_length");
459 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
460 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
461 ".amdhsa_user_sgpr_kernarg_preload_offset");
462 }
465 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
466 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
467 ".amdhsa_user_sgpr_private_segment_size");
468 if (IVersion.Major >= 10)
470 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
471 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
472 ".amdhsa_wavefront_size32");
475 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
476 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
477 ".amdhsa_uses_dynamic_stack");
479 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
480 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
482 ? ".amdhsa_enable_private_segment"
483 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
485 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
487 ".amdhsa_system_sgpr_workgroup_id_x");
489 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
490 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
491 ".amdhsa_system_sgpr_workgroup_id_y");
493 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
494 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
495 ".amdhsa_system_sgpr_workgroup_id_z");
497 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
498 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
499 ".amdhsa_system_sgpr_workgroup_info");
501 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
502 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
503 ".amdhsa_system_vgpr_workitem_id");
504
505 // These directives are required.
506 OS << "\t\t.amdhsa_next_free_vgpr ";
507 EmitMCExpr(NextVGPR);
508 OS << '\n';
509
510 OS << "\t\t.amdhsa_next_free_sgpr ";
511 EmitMCExpr(NextSGPR);
512 OS << '\n';
513
514 if (AMDGPU::isGFX90A(STI)) {
515 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
516 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
518 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
519 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
520 accum_bits = MCBinaryExpr::createAdd(
521 accum_bits, MCConstantExpr::create(1, getContext()), getContext());
522 accum_bits = MCBinaryExpr::createMul(
523 accum_bits, MCConstantExpr::create(4, getContext()), getContext());
524 OS << "\t\t.amdhsa_accum_offset ";
525 const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext());
526 printAMDGPUMCExpr(New, OS, &MAI);
527 OS << '\n';
528 }
529
530 if (isGFX1250Plus(STI))
532 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
533 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
534 ".amdhsa_named_barrier_count");
535
536 OS << "\t\t.amdhsa_reserve_vcc ";
537 EmitMCExpr(ReserveVCC);
538 OS << '\n';
539
540 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
541 OS << "\t\t.amdhsa_reserve_flat_scratch ";
542 EmitMCExpr(ReserveFlatScr);
543 OS << '\n';
544 }
545
546 switch (CodeObjectVersion) {
547 default:
548 break;
551 if (getTargetID()->isXnackSupported())
552 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny()
553 << '\n';
554 break;
555 }
556
558 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
559 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
560 ".amdhsa_float_round_mode_32");
562 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
563 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
564 ".amdhsa_float_round_mode_16_64");
566 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
567 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
568 ".amdhsa_float_denorm_mode_32");
570 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
571 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
572 ".amdhsa_float_denorm_mode_16_64");
573 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode)) {
575 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
576 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
577 ".amdhsa_dx10_clamp");
579 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
580 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
581 ".amdhsa_ieee_mode");
582 }
583 if (IVersion.Major >= 9) {
585 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
586 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
587 ".amdhsa_fp16_overflow");
588 }
589 if (AMDGPU::isGFX90A(STI))
591 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
592 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
593 if (AMDGPU::supportsWGP(STI))
595 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
596 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
597 ".amdhsa_workgroup_processor_mode");
598 if (IVersion.Major >= 10) {
600 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
601 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
602 ".amdhsa_memory_ordered");
604 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
605 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
606 ".amdhsa_forward_progress");
607 }
608 if (IVersion.Major >= 10 && IVersion.Major < 12) {
610 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
611 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
612 ".amdhsa_shared_vgpr_count");
613 }
614 if (IVersion.Major == 11) {
616 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
617 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
618 ".amdhsa_inst_pref_size");
619 }
620 if (IVersion.Major >= 12) {
622 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
623 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
624 ".amdhsa_inst_pref_size");
626 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
627 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
628 ".amdhsa_round_robin_scheduling");
629 }
632 amdhsa::
633 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
634 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
635 ".amdhsa_exception_fp_ieee_invalid_op");
638 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
639 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
640 ".amdhsa_exception_fp_denorm_src");
643 amdhsa::
644 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
645 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
646 ".amdhsa_exception_fp_ieee_div_zero");
649 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
650 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
651 ".amdhsa_exception_fp_ieee_overflow");
654 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
655 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
656 ".amdhsa_exception_fp_ieee_underflow");
659 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
660 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
661 ".amdhsa_exception_fp_ieee_inexact");
664 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
665 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
666 ".amdhsa_exception_int_div_zero");
667
668 OS << "\t.end_amdhsa_kernel\n";
669}
670
671namespace {
672/// Callback type invoked by \c forEachInfoScope for each function scope in
673/// the canonical iteration order. The scope is emitted exactly once per
674/// unique \p Sym regardless of how many flat entries reference it.
675using InfoScopeEmitter = function_ref<void(
677 ArrayRef<MCSymbol *> Calls, ArrayRef<StringRef> IndirectCallTypeIds,
678 ArrayRef<StringRef> TypeIds)>;
679
680/// Group the flat edge lists in \p Data by source function symbol and drive
681/// per-scope emission. A scope is opened for every function with attached
682/// info and for every function that appears only as an edge source; each
683/// scope is emitted exactly once. Both the asm and ELF streamers share this
684/// iteration logic and only differ in the per-scope emission callback.
685static void forEachInfoScope(const AMDGPU::InfoSectionData &Data,
686 InfoScopeEmitter Emit) {
691 for (const auto &[Func, Res] : Data.Uses)
692 FuncUses[Func].push_back(Res);
693 for (const auto &[Src, Dst] : Data.Calls)
694 FuncCalls[Src].push_back(Dst);
695 for (const auto &[Func, TypeId] : Data.IndirectCalls)
696 FuncIndirectCalls[Func].push_back(TypeId);
697 for (const auto &[Sym, TypeId] : Data.TypeIds)
698 FuncTypeIds[Sym].push_back(TypeId);
699
700 DenseSet<MCSymbol *> Emitted;
701 auto EmitIfNew = [&](MCSymbol *Sym, const AMDGPU::FuncInfo *Info) {
702 if (!Emitted.insert(Sym).second)
703 return;
705 ArrayRef<StringRef> IndirectCallTypeIds, TypeIds;
706 if (auto It = FuncUses.find(Sym); It != FuncUses.end())
707 Uses = It->second;
708 if (auto It = FuncCalls.find(Sym); It != FuncCalls.end())
709 Calls = It->second;
710 if (auto It = FuncIndirectCalls.find(Sym); It != FuncIndirectCalls.end())
711 IndirectCallTypeIds = It->second;
712 if (auto It = FuncTypeIds.find(Sym); It != FuncTypeIds.end())
713 TypeIds = It->second;
714 Emit(Sym, Info, Uses, Calls, IndirectCallTypeIds, TypeIds);
715 };
716
717 for (const AMDGPU::FuncInfo &Func : Data.Funcs)
718 EmitIfNew(Func.Sym, &Func);
719 // Emit scopes for functions that only appear as edge sources (e.g. typeid
720 // tags on address-taken declarations, or callers of external functions).
721 for (const auto &[Sym, TypeId] : Data.TypeIds)
722 EmitIfNew(Sym, nullptr);
723 for (const auto &[Sym, Res] : Data.Uses)
724 EmitIfNew(Sym, nullptr);
725 for (const auto &[Sym, Dst] : Data.Calls)
726 EmitIfNew(Sym, nullptr);
727 for (const auto &[Sym, TypeId] : Data.IndirectCalls)
728 EmitIfNew(Sym, nullptr);
729}
730} // namespace
731
734 forEachInfoScope(Data, [&](MCSymbol *Sym, const AMDGPU::FuncInfo *Info,
737 ArrayRef<StringRef> IndirectCallTypeIds,
738 ArrayRef<StringRef> TypeIds) {
739 OS << "\t.amdgpu_info " << Sym->getName() << '\n';
740 if (Info) {
741 AMDGPU::FuncInfoFlags Flags{};
742 if (Info->UsesVCC)
744 if (Info->UsesFlatScratch)
746 if (Info->HasDynStack)
748 OS << "\t\t.amdgpu_flags " << llvm::to_underlying(Flags) << '\n';
749 OS << "\t\t.amdgpu_num_sgpr " << Info->NumSGPR << '\n';
750 OS << "\t\t.amdgpu_num_vgpr " << Info->NumArchVGPR << '\n';
751 if (Info->NumAccVGPR)
752 OS << "\t\t.amdgpu_num_agpr " << Info->NumAccVGPR << '\n';
753 OS << "\t\t.amdgpu_private_segment_size " << Info->PrivateSegmentSize
754 << '\n';
755 }
756 for (MCSymbol *Res : Uses)
757 OS << "\t\t.amdgpu_use " << Res->getName() << '\n';
758 for (MCSymbol *Dst : Calls)
759 OS << "\t\t.amdgpu_call " << Dst->getName() << '\n';
760 for (StringRef TypeId : IndirectCallTypeIds)
761 OS << "\t\t.amdgpu_indirect_call \"" << TypeId << "\"\n";
762 for (StringRef TypeId : TypeIds)
763 OS << "\t\t.amdgpu_typeid \"" << TypeId << "\"\n";
764 OS << "\t.end_amdgpu_info\n\n";
765 });
766}
767
768//===----------------------------------------------------------------------===//
769// AMDGPUTargetELFStreamer
770//===----------------------------------------------------------------------===//
771
775
777 return static_cast<MCELFStreamer &>(Streamer);
778}
779
780// A hook for emitting stuff at the end.
781// We use it for emitting the accumulated PAL metadata as a .note record.
782// The PAL metadata is reset after it is emitted.
785 W.setELFHeaderEFlags(getEFlags());
786 W.setOverrideABIVersion(
787 getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion));
788
789 std::string Blob;
790 const char *Vendor = getPALMetadata()->getVendor();
791 unsigned Type = getPALMetadata()->getType();
792 getPALMetadata()->toBlob(Type, Blob);
793 if (Blob.empty())
794 return;
795 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
796 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
797
798 // Reset the pal metadata so its data will not affect a compilation that
799 // reuses this object.
801}
802
803void AMDGPUTargetELFStreamer::EmitNote(
804 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
805 function_ref<void(MCELFStreamer &)> EmitDesc) {
806 auto &S = getStreamer();
807 auto &Context = S.getContext();
808
809 auto NameSZ = Name.size() + 1;
810
811 unsigned NoteFlags = 0;
812 // TODO Apparently, this is currently needed for OpenCL as mentioned in
813 // https://reviews.llvm.org/D74995
814 if (isHsaAbi(STI))
815 NoteFlags = ELF::SHF_ALLOC;
816
817 S.pushSection();
818 S.switchSection(
819 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
820 S.emitInt32(NameSZ); // namesz
821 S.emitValue(DescSZ, 4); // descz
822 S.emitInt32(NoteType); // type
823 S.emitBytes(Name); // name
824 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
825 EmitDesc(S); // desc
826 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
827 S.popSection();
828}
829
830unsigned AMDGPUTargetELFStreamer::getEFlags() {
831 switch (STI.getTargetTriple().getArch()) {
832 default:
833 llvm_unreachable("Unsupported Arch");
834 case Triple::r600:
835 return getEFlagsR600();
836 case Triple::amdgcn:
837 return getEFlagsAMDGCN();
838 }
839}
840
841unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
842 assert(STI.getTargetTriple().getArch() == Triple::r600);
843
844 return getElfMach(STI.getCPU());
845}
846
847unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
848 assert(STI.getTargetTriple().isAMDGCN());
849
850 switch (STI.getTargetTriple().getOS()) {
851 default:
852 // TODO: Why are some tests have "mingw" listed as OS?
853 // llvm_unreachable("Unsupported OS");
855 return getEFlagsUnknownOS();
856 case Triple::AMDHSA:
857 return getEFlagsAMDHSA();
858 case Triple::AMDPAL:
859 return getEFlagsAMDPAL();
860 case Triple::Mesa3D:
861 return getEFlagsMesa3D();
862 }
863}
864
865unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
866 // TODO: Why are some tests have "mingw" listed as OS?
867 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
868
869 return getEFlagsV3();
870}
871
872unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
873 assert(isHsaAbi(STI));
874
875 if (CodeObjectVersion >= 6)
876 return getEFlagsV6();
877 return getEFlagsV4();
878}
879
880unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
881 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
882
883 return getEFlagsV3();
884}
885
886unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
887 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
888
889 return getEFlagsV3();
890}
891
892unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
893 unsigned EFlagsV3 = 0;
894
895 // mach.
896 EFlagsV3 |= getElfMach(STI.getCPU());
897
898 // xnack.
899 if (getTargetID()->isXnackOnOrAny())
901 // sramecc.
902 if (getTargetID()->isSramEccOnOrAny())
904
905 return EFlagsV3;
906}
907
908unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
909 unsigned EFlagsV4 = 0;
910
911 // mach.
912 EFlagsV4 |= getElfMach(STI.getCPU());
913
914 // xnack.
915 switch (getTargetID()->getXnackSetting()) {
918 break;
921 break;
924 break;
927 break;
928 }
929 // sramecc.
930 switch (getTargetID()->getSramEccSetting()) {
933 break;
936 break;
939 break;
942 break;
943 }
944
945 return EFlagsV4;
946}
947
948unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
949 unsigned Flags = getEFlagsV4();
950
951 unsigned Version = ForceGenericVersion;
952 if (!Version) {
953 switch (parseArchAMDGCN(STI.getCPU())) {
954 case AMDGPU::GK_GFX9_GENERIC:
956 break;
957 case AMDGPU::GK_GFX9_4_GENERIC:
959 break;
960 case AMDGPU::GK_GFX10_1_GENERIC:
962 break;
963 case AMDGPU::GK_GFX10_3_GENERIC:
965 break;
966 case AMDGPU::GK_GFX11_GENERIC:
968 break;
969 case AMDGPU::GK_GFX12_GENERIC:
971 break;
972 case AMDGPU::GK_GFX12_5_GENERIC:
974 break;
975 default:
976 break;
977 }
978 }
979
980 // Versions start at 1.
981 if (Version) {
983 report_fatal_error("Cannot encode generic code object version " +
984 Twine(Version) +
985 " - no ELF flag can represent this version!");
987 }
988
989 return Flags;
990}
991
993
995 MCStreamer &OS = getStreamer();
996 OS.pushSection();
997 Header.EmitKernelCodeT(OS, getContext());
998 OS.popSection();
999}
1000
1002 unsigned Type) {
1003 auto *Symbol = static_cast<MCSymbolELF *>(
1004 getStreamer().getContext().getOrCreateSymbol(SymbolName));
1005 Symbol->setType(Type);
1006}
1007
1009 Align Alignment) {
1010 auto *SymbolELF = static_cast<MCSymbolELF *>(Symbol);
1011 SymbolELF->setType(ELF::STT_OBJECT);
1012
1013 if (!SymbolELF->isBindingSet())
1014 SymbolELF->setBinding(ELF::STB_GLOBAL);
1015
1016 if (SymbolELF->declareCommon(Size, Alignment)) {
1017 report_fatal_error("Symbol: " + Symbol->getName() +
1018 " redeclared as different type");
1019 }
1020
1021 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
1022 SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
1023}
1024
1026 // Create two labels to mark the beginning and end of the desc field
1027 // and a MCExpr to calculate the size of the desc field.
1028 auto &Context = getContext();
1029 auto *DescBegin = Context.createTempSymbol();
1030 auto *DescEnd = Context.createTempSymbol();
1031 auto *DescSZ = MCBinaryExpr::createSub(
1032 MCSymbolRefExpr::create(DescEnd, Context),
1033 MCSymbolRefExpr::create(DescBegin, Context), Context);
1034
1036 [&](MCELFStreamer &OS) {
1037 OS.emitLabel(DescBegin);
1038
1039 SmallString<32> Str;
1040 raw_svector_ostream StrOS(Str);
1041 StrOS << *getTargetID();
1042
1043 OS.emitBytes(StrOS.str());
1044 OS.emitLabel(DescEnd);
1045 });
1046 return true;
1047}
1048
1050 bool Strict) {
1052 if (!Verifier.verify(HSAMetadataDoc.getRoot()))
1053 return false;
1054
1055 std::string HSAMetadataString;
1056 HSAMetadataDoc.writeToBlob(HSAMetadataString);
1057
1058 // Create two labels to mark the beginning and end of the desc field
1059 // and a MCExpr to calculate the size of the desc field.
1060 auto &Context = getContext();
1061 auto *DescBegin = Context.createTempSymbol();
1062 auto *DescEnd = Context.createTempSymbol();
1063 auto *DescSZ = MCBinaryExpr::createSub(
1064 MCSymbolRefExpr::create(DescEnd, Context),
1065 MCSymbolRefExpr::create(DescBegin, Context), Context);
1066
1068 [&](MCELFStreamer &OS) {
1069 OS.emitLabel(DescBegin);
1070 OS.emitBytes(HSAMetadataString);
1071 OS.emitLabel(DescEnd);
1072 });
1073 return true;
1074}
1075
1077 const uint32_t Encoded_s_code_end = 0xbf9f0000;
1078 const uint32_t Encoded_s_nop = 0xbf800000;
1079 uint32_t Encoded_pad = Encoded_s_code_end;
1080
1081 // Instruction cache line size in bytes.
1082 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
1083 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
1084
1085 // Extra padding amount in bytes to support prefetch mode 3.
1086 unsigned FillSize = 3 * CacheLineSize;
1087
1088 if (AMDGPU::isGFX90A(STI)) {
1089 Encoded_pad = Encoded_s_nop;
1090 FillSize = 16 * CacheLineSize;
1091 }
1092
1093 MCStreamer &OS = getStreamer();
1094 OS.pushSection();
1095 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
1096 for (unsigned I = 0; I < FillSize; I += 4)
1097 OS.emitInt32(Encoded_pad);
1098 OS.popSection();
1099 return true;
1100}
1101
1103 const MCSubtargetInfo &STI, StringRef KernelName,
1104 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
1105 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
1106 const MCExpr *ReserveFlatScr) {
1107 auto &Streamer = getStreamer();
1108 auto &Context = Streamer.getContext();
1109
1110 auto *KernelCodeSymbol =
1111 static_cast<MCSymbolELF *>(Context.getOrCreateSymbol(Twine(KernelName)));
1112 auto *KernelDescriptorSymbol = static_cast<MCSymbolELF *>(
1113 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
1114
1115 // Copy kernel descriptor symbol's binding, other and visibility from the
1116 // kernel code symbol.
1117 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
1118 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1119 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1120 // Kernel descriptor symbol's type and size are fixed.
1121 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
1122 KernelDescriptorSymbol->setSize(
1124
1125 // The visibility of the kernel code symbol must be protected or less to allow
1126 // static relocations from the kernel descriptor to be used.
1127 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
1128 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
1129
1130 Streamer.emitLabel(KernelDescriptorSymbol);
1131 Streamer.emitValue(
1132 KernelDescriptor.group_segment_fixed_size,
1134 Streamer.emitValue(
1135 KernelDescriptor.private_segment_fixed_size,
1137 Streamer.emitValue(KernelDescriptor.kernarg_size,
1139
1140 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
1141 Streamer.emitInt8(0u);
1142
1143 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
1144 // expression being created is:
1145 // (start of kernel code) - (start of kernel descriptor)
1146 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1147 Streamer.emitValue(
1150 Context),
1151 MCSymbolRefExpr::create(KernelDescriptorSymbol, Context), Context),
1153 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1154 Streamer.emitInt8(0u);
1155 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3,
1157 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1,
1159 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2,
1161 Streamer.emitValue(
1162 KernelDescriptor.kernel_code_properties,
1164 Streamer.emitValue(KernelDescriptor.kernarg_preload,
1166 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1167 Streamer.emitInt8(0u);
1168}
1169
1173 MCContext &Context = S.getContext();
1174
1176 auto getOrAddString = [&](StringRef Str) -> uint32_t {
1177 if (Str.empty())
1178 return UINT32_MAX;
1179 return StrTab.add(Str);
1180 };
1181
1182 auto EmitU32Entry = [&](AMDGPU::InfoKind Kind, uint32_t Val) {
1183 S.emitInt8(static_cast<uint8_t>(Kind));
1184 S.emitInt8(4);
1185 S.emitInt32(Val);
1186 };
1187 auto EmitSymEntry = [&](AMDGPU::InfoKind Kind, MCSymbol *Sym) {
1188 S.emitInt8(static_cast<uint8_t>(Kind));
1189 S.emitInt8(8);
1190 S.emitValue(MCSymbolRefExpr::create(Sym, Context), 8);
1191 };
1192
1193 S.pushSection();
1194 MCSectionELF *InfoSec = Context.getELFSection(
1195 ".amdgpu.info", ELF::SHT_PROGBITS, ELF::SHF_EXCLUDE);
1196 S.switchSection(InfoSec);
1197
1198 forEachInfoScope(Data, [&](MCSymbol *Sym, const AMDGPU::FuncInfo *Info,
1201 ArrayRef<StringRef> IndirectCallTypeIds,
1202 ArrayRef<StringRef> TypeIds) {
1203 EmitSymEntry(AMDGPU::InfoKind::INFO_FUNC, Sym);
1204
1205 if (Info) {
1206 AMDGPU::FuncInfoFlags Flags{};
1207 if (Info->UsesVCC)
1209 if (Info->UsesFlatScratch)
1211 if (Info->HasDynStack)
1214 EmitU32Entry(AMDGPU::InfoKind::INFO_NUM_SGPR, Info->NumSGPR);
1215 EmitU32Entry(AMDGPU::InfoKind::INFO_NUM_VGPR, Info->NumArchVGPR);
1216 // INFO_NUM_AGPR is only emitted when the function actually uses AGPRs,
1217 // since AGPRs are not available on all architectures.
1218 if (Info->NumAccVGPR)
1219 EmitU32Entry(AMDGPU::InfoKind::INFO_NUM_AGPR, Info->NumAccVGPR);
1221 Info->PrivateSegmentSize);
1222 }
1223
1224 for (MCSymbol *Res : Uses)
1225 EmitSymEntry(AMDGPU::InfoKind::INFO_USE, Res);
1226 for (MCSymbol *Dst : Calls)
1227 EmitSymEntry(AMDGPU::InfoKind::INFO_CALL, Dst);
1228 for (StringRef TypeId : IndirectCallTypeIds) {
1230 getOrAddString(TypeId));
1231 }
1232 for (StringRef TypeId : TypeIds)
1233 EmitU32Entry(AMDGPU::InfoKind::INFO_TYPEID, getOrAddString(TypeId));
1234 });
1235
1236 if (!StrTab.empty()) {
1237 StrTab.finalizeInOrder();
1238 MCSectionELF *Sec = Context.getELFSection(".amdgpu.strtab", ELF::SHT_STRTAB,
1240 S.switchSection(Sec);
1241 SmallString<128> Buf;
1242 raw_svector_ostream OS(Buf);
1243 StrTab.write(OS);
1244 S.emitBytes(Buf);
1245 }
1246
1247 S.popSection();
1248}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
This is a verifier for AMDGPU HSA metadata, which can verify both well-typed metadata and untyped met...
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
Enums and constants for AMDGPU PT_NOTE sections.
static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v<N> flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))
#define PRINT_RES_INFO(ARG)
AMDHSA kernel descriptor definitions.
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
#define I(x, y, z)
Definition MD5.cpp:57
Remove Loads Into Fake Uses
verify safepoint Safepoint IR Verifier
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))
const char * getVendor() const
void toBlob(unsigned Type, std::string &S)
void toString(std::string &S)
void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data) override
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override
void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
bool EmitCodeEnd(const MCSubtargetInfo &STI) override
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override
bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override
void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override
void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data) override
virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)
Emit HSA Metadata.
AMDGPUPALMetadata * getPALMetadata()
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)
static unsigned getElfMach(StringRef GPU)
static StringRef getArchNameFromElfMach(unsigned ElfMach)
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:66
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:398
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCAsmInfo & getAsmInfo() const
Definition MCContext.h:409
ELFObjectWriter & getWriter()
void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override
Emit a label for Symbol into the current section.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
void emitBytes(StringRef Data) override
Emit the bytes in Data into the output.
This represents a section on linux, lots of unix variants and some bare metal systems.
Streaming machine code generation interface.
Definition MCStreamer.h:222
virtual bool popSection()
Restore the current and previous section from the section stack.
MCContext & getContext() const
Definition MCStreamer.h:323
void emitValue(const MCExpr *Value, unsigned Size, SMLoc Loc=SMLoc())
virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)
Emit some number of copies of Value until the byte alignment ByteAlignment is reached.
void pushSection()
Save the current and previous section on the section stack.
Definition MCStreamer.h:450
virtual void switchSection(MCSection *Section, uint32_t Subsec=0)
Set the current section where code is being emitted to Section.
void emitInt32(uint64_t Value)
Definition MCStreamer.h:757
void emitInt8(uint64_t Value)
Definition MCStreamer.h:755
Generic base class for all target subtargets.
bool hasFeature(unsigned Feature) const
const Triple & getTargetTriple() const
StringRef getCPU() const
void setBinding(unsigned Binding) const
void setType(unsigned Type) const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Utility for building string tables with deduplicated suffixes.
LLVM_ABI void finalizeInOrder()
Finalize the string table without reording it.
LLVM_ABI size_t add(CachedHashStringRef S, uint8_t Priority=0)
Add a string to the builder.
LLVM_ABI void write(raw_ostream &OS) const
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition Triple.h:436
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...
An efficient, type-erasing, non-owning reference to a callable.
Simple in-memory representation of a document of msgpack objects with ability to find and create arra...
DocNode & getRoot()
Get ref to the document's root element.
LLVM_ABI void toYAML(raw_ostream &OS)
Convert MsgPack Document to YAML text.
LLVM_ABI void writeToBlob(std::string &Blob)
Write a MsgPack document to a binary MsgPack blob.
LLVM_ABI bool fromYAML(StringRef S)
Read YAML text into the MsgPack document. Returns false on failure.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const char NoteNameV2[]
const char SectionName[]
const char NoteNameV3[]
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
LLVM_ABI StringRef getArchNameR600(GPUKind AK)
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
GPUKind
GPU kinds supported by the AMDGPU target.
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
bool isHsaAbi(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
InfoKind
Entry kind values for the .amdgpu.info section.
@ INFO_INDIRECT_CALL
Indirect call edge: the function contains an indirect call whose callee is expected to match the type...
@ INFO_FLAGS
Bitfield of FuncInfoFlags properties for the function. [u32].
@ INFO_FUNC
Opens a new function scope.
@ INFO_NUM_SGPR
Number of SGPRs explicitly used by the function. [u32].
@ INFO_NUM_VGPR
Number of architectural VGPRs used by the function. [u32].
@ INFO_CALL
Direct call edge: the function calls the callee identified by the 8-byte relocated symbol.
@ INFO_NUM_AGPR
Number of accumulator VGPRs (AGPRs) used by the function. [u32].
@ INFO_TYPEID
Function type ID: tags an address-taken function with a type-ID string (at the given ....
@ INFO_PRIVATE_SEGMENT_SIZE
Private (scratch) memory size in bytes required by the function. [u32].
@ INFO_USE
Dependency edge: the function uses the resource identified by the 8-byte relocated symbol (e....
LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
LLVM_ABI GPUKind parseArchR600(StringRef CPU)
@ NT_AMDGPU_METADATA
Definition ELF.h:1989
@ SHN_AMDGPU_LDS
Definition ELF.h:1972
@ SHF_EXCLUDE
Definition ELF.h:1285
@ SHF_ALLOC
Definition ELF.h:1251
@ SHT_STRTAB
Definition ELF.h:1152
@ SHT_PROGBITS
Definition ELF.h:1150
@ SHT_NOTE
Definition ELF.h:1156
@ STB_GLOBAL
Definition ELF.h:1408
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ STT_OBJECT
Definition ELF.h:1420
@ EF_AMDGPU_GENERIC_VERSION_MAX
Definition ELF.h:927
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
Definition ELF.h:904
@ EF_AMDGPU_FEATURE_SRAMECC_V3
Definition ELF.h:895
@ EF_AMDGPU_GENERIC_VERSION_OFFSET
Definition ELF.h:925
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
Definition ELF.h:915
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
Definition ELF.h:919
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
Definition ELF.h:902
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
Definition ELF.h:906
@ EF_AMDGPU_FEATURE_XNACK_V3
Definition ELF.h:890
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
Definition ELF.h:908
@ EF_AMDGPU_MACH_NONE
Definition ELF.h:851
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
Definition ELF.h:917
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
Definition ELF.h:921
@ NT_AMD_HSA_ISA_NAME
Definition ELF.h:1982
@ STV_PROTECTED
Definition ELF.h:1440
@ STV_DEFAULT
Definition ELF.h:1437
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr std::underlying_type_t< Enum > to_underlying(Enum E)
Returns underlying integer value of an enum.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Instruction set architecture version.
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77