LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
533 Imm = getInlineImmValF16(Imm);
534 break;
537 Imm = getInlineImmValF16(Imm);
538 break;
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
567 ArrayRef<uint8_t> Bytes_,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
571 Bytes = Bytes_.slice(0, MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min((size_t)4, Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
586 break;
587 Bytes = Bytes_.slice(0, MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX1170() &&
594 tryDecodeInst(DecoderTableGFX117096, DecoderTableGFX1170_FAKE1696, MI,
595 DecW, Address, CS))
596 break;
597
598 if (isGFX11() &&
599 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
600 DecW, Address, CS))
601 break;
602
603 if (isGFX1250() &&
604 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
605 DecW, Address, CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
610 DecW, Address, CS))
611 break;
612
613 if (isGFX12() &&
614 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
615 break;
616
617 if (isGFX13() &&
618 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
619 DecW, Address, CS))
620 break;
621
622 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
623 // Return 8 bytes for a potential literal.
624 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
625
626 if (isGFX1250() &&
627 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
628 break;
629 }
630
631 // Reinitialize Bytes
632 Bytes = Bytes_.slice(0, MaxInstBytesNum);
633
634 } else if (Bytes.size() >= 16 &&
635 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
636 std::bitset<128> DecW = eat16Bytes(Bytes);
637 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
638 break;
639
640 // Reinitialize Bytes
641 Bytes = Bytes_.slice(0, MaxInstBytesNum);
642 }
643
644 if (Bytes.size() >= 8) {
645 const uint64_t QW = eatBytes<uint64_t>(Bytes);
646
647 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
648 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
649 break;
650
651 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
652 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
653 break;
654
655 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
656 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
657 break;
658
659 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
660 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
661 // table first so we print the correct name.
662 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
663 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
664 break;
665
666 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
667 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
668 break;
669
670 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
671 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
672 break;
673
674 if ((isVI() || isGFX9()) &&
675 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
676 break;
677
678 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
679 break;
680
681 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
682 break;
683
684 if (isGFX1250() &&
685 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
686 QW, Address, CS))
687 break;
688
689 if (isGFX12() &&
690 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
691 Address, CS))
692 break;
693
694 if (isGFX1170() &&
695 tryDecodeInst(DecoderTableGFX117064, DecoderTableGFX1170_FAKE1664, MI,
696 QW, Address, CS))
697 break;
698
699 if (isGFX11() &&
700 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
701 Address, CS))
702 break;
703
704 if (isGFX1170() &&
705 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
706 break;
707
708 if (isGFX11() &&
709 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
710 break;
711
712 if (isGFX12() &&
713 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
714 break;
715
716 if (isGFX13() &&
717 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
718 Address, CS))
719 break;
720
721 // Reinitialize Bytes
722 Bytes = Bytes_.slice(0, MaxInstBytesNum);
723 }
724
725 // Try decode 32-bit instruction
726 if (Bytes.size() >= 4) {
727 const uint32_t DW = eatBytes<uint32_t>(Bytes);
728
729 if ((isVI() || isGFX9()) &&
730 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
731 break;
732
733 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
734 break;
735
736 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
737 break;
738
739 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
740 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
741 break;
742
743 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
744 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
745 break;
746
747 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
748 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
749 break;
750
751 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
752 break;
753
754 if (isGFX1170() &&
755 tryDecodeInst(DecoderTableGFX117032, DecoderTableGFX1170_FAKE1632, MI,
756 DW, Address, CS))
757 break;
758
759 if (isGFX11() &&
760 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
761 Address, CS))
762 break;
763
764 if (isGFX1250() &&
765 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
766 DW, Address, CS))
767 break;
768
769 if (isGFX12() &&
770 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
771 Address, CS))
772 break;
773
774 if (isGFX13() &&
775 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
776 Address, CS))
777 break;
778 }
779
781 } while (false);
782
784
785 decodeImmOperands(MI, *MCII);
786
787 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
788 if (isMacDPP(MI))
790
791 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
793 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
794 convertVOPCDPPInst(MI); // Special VOP3 case
795 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
796 convertVOPC64DPPInst(MI); // Special VOP3 case
797 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
798 -1)
800 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
801 convertVOP3DPPInst(MI); // Regular VOP3 case
802 }
803
805
806 if (AMDGPU::isMAC(MI.getOpcode())) {
807 // Insert dummy unused src2_modifiers.
809 AMDGPU::OpName::src2_modifiers);
810 }
811
812 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
813 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
814 // Insert dummy unused src2_modifiers.
816 AMDGPU::OpName::src2_modifiers);
817 }
818
819 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
821 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
822 }
823
824 if (MCII->get(MI.getOpcode()).TSFlags &
826 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
827 AMDGPU::OpName::cpol);
828 if (CPolPos != -1) {
829 unsigned CPol =
830 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
832 if (MI.getNumOperands() <= (unsigned)CPolPos) {
834 AMDGPU::OpName::cpol);
835 } else if (CPol) {
836 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
837 }
838 }
839 }
840
841 if ((MCII->get(MI.getOpcode()).TSFlags &
843 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
844 // GFX90A lost TFE, its place is occupied by ACC.
845 int TFEOpIdx =
846 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
847 if (TFEOpIdx != -1) {
848 auto *TFEIter = MI.begin();
849 std::advance(TFEIter, TFEOpIdx);
850 MI.insert(TFEIter, MCOperand::createImm(0));
851 }
852 }
853
854 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
856 int OffsetIdx =
857 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
858 if (OffsetIdx != -1) {
859 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
860 int64_t SignedOffset = SignExtend64<24>(Imm);
861 if (SignedOffset < 0)
863 }
864 }
865
866 if (MCII->get(MI.getOpcode()).TSFlags &
868 int SWZOpIdx =
869 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
870 if (SWZOpIdx != -1) {
871 auto *SWZIter = MI.begin();
872 std::advance(SWZIter, SWZOpIdx);
873 MI.insert(SWZIter, MCOperand::createImm(0));
874 }
875 }
876
877 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
878 if (Desc.TSFlags & SIInstrFlags::MIMG) {
879 int VAddr0Idx =
880 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
881 int RsrcIdx =
882 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
883 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
884 if (VAddr0Idx >= 0 && NSAArgs > 0) {
885 unsigned NSAWords = (NSAArgs + 3) / 4;
886 if (Bytes.size() < 4 * NSAWords)
888 for (unsigned i = 0; i < NSAArgs; ++i) {
889 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
890 auto VAddrRCID =
891 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
892 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
893 }
894 Bytes = Bytes.slice(4 * NSAWords);
895 }
896
898 }
899
900 if (MCII->get(MI.getOpcode()).TSFlags &
903
904 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
906
907 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
909
910 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
912
913 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
915
916 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
918
919 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
920 AMDGPU::OpName::vdst_in);
921 if (VDstIn_Idx != -1) {
922 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
924 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
925 !MI.getOperand(VDstIn_Idx).isReg() ||
926 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
927 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
928 MI.erase(&MI.getOperand(VDstIn_Idx));
930 MCOperand::createReg(MI.getOperand(Tied).getReg()),
931 AMDGPU::OpName::vdst_in);
932 }
933 }
934
935 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
936 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
938
939 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
940 // have EXEC as implicit destination. Issue a warning if encoding for
941 // vdst is not EXEC.
942 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
943 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
944 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
945 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
946 if (Bytes_[0] != ExecEncoding)
948 }
949
950 Size = MaxInstBytesNum - Bytes.size();
951 return Status;
952}
953
955 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
956 // The MCInst still has these fields even though they are no longer encoded
957 // in the GFX11 instruction.
958 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
959 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
960 }
961}
962
965 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
978 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
979 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
980 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
981 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
982 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
983 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
984 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
985 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
986 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
987 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
988 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
989 // The MCInst has this field that is not directly encoded in the
990 // instruction.
991 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
992 }
993}
994
996 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
997 STI.hasFeature(AMDGPU::FeatureGFX10)) {
998 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
999 // VOPC - insert clamp
1000 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
1001 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1002 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
1003 if (SDst != -1) {
1004 // VOPC - insert VCC register as sdst
1006 AMDGPU::OpName::sdst);
1007 } else {
1008 // VOP1/2 - insert omod if present in instruction
1009 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
1010 }
1011 }
1012}
1013
1014/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1015/// appropriate subregister for the used format width.
1017 MCOperand &MO, uint8_t NumRegs) {
1018 switch (NumRegs) {
1019 case 4:
1020 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1021 case 6:
1022 return MO.setReg(
1023 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1024 case 8:
1025 if (MCRegister NewReg = MRI.getSubReg(
1026 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1027 MO.setReg(NewReg);
1028 }
1029 return;
1030 case 12: {
1031 // There is no 384-bit subreg index defined.
1032 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1033 MCRegister NewReg = MRI.getMatchingSuperReg(
1034 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1035 return MO.setReg(NewReg);
1036 }
1037 case 16:
1038 // No-op in cases where one operand is still f8/bf8.
1039 return;
1040 default:
1041 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1042 }
1043}
1044
1045/// f8f6f4 instructions have different pseudos depending on the used formats. In
1046/// the disassembler table, we only have the variants with the largest register
1047/// classes which assume using an fp8/bf8 format for both operands. The actual
1048/// register class depends on the format in blgp and cbsz operands. Adjust the
1049/// register classes depending on the used format.
1051 int BlgpIdx =
1052 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1053 if (BlgpIdx == -1)
1054 return;
1055
1056 int CbszIdx =
1057 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1058
1059 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1060 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1061
1062 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1063 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1064 if (!AdjustedRegClassOpcode ||
1065 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1066 return;
1067
1068 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1069 int Src0Idx =
1070 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1071 int Src1Idx =
1072 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1073 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1074 AdjustedRegClassOpcode->NumRegsSrcA);
1075 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1076 AdjustedRegClassOpcode->NumRegsSrcB);
1077}
1078
1080 int FmtAIdx =
1081 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1082 if (FmtAIdx == -1)
1083 return;
1084
1085 int FmtBIdx =
1086 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1087
1088 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1089 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1090
1091 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1092 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1093 if (!AdjustedRegClassOpcode ||
1094 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1095 return;
1096
1097 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1098 int Src0Idx =
1099 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1100 int Src1Idx =
1101 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1102 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1103 AdjustedRegClassOpcode->NumRegsSrcA);
1104 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1105 AdjustedRegClassOpcode->NumRegsSrcB);
1106}
1107
1109 unsigned OpSel = 0;
1110 unsigned OpSelHi = 0;
1111 unsigned NegLo = 0;
1112 unsigned NegHi = 0;
1113};
1114
1115// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1116// Note that these values do not affect disassembler output,
1117// so this is only necessary for consistency with src_modifiers.
1119 bool IsVOP3P = false) {
1120 VOPModifiers Modifiers;
1121 unsigned Opc = MI.getOpcode();
1122 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1123 AMDGPU::OpName::src1_modifiers,
1124 AMDGPU::OpName::src2_modifiers};
1125 for (int J = 0; J < 3; ++J) {
1126 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1127 if (OpIdx == -1)
1128 continue;
1129
1130 unsigned Val = MI.getOperand(OpIdx).getImm();
1131
1132 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1133 if (IsVOP3P) {
1134 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1135 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1136 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1137 } else if (J == 0) {
1138 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1139 }
1140 }
1141
1142 return Modifiers;
1143}
1144
1145// Instructions decode the op_sel/suffix bits into the src_modifier
1146// operands. Copy those bits into the src operands for true16 VGPRs.
1148 const unsigned Opc = MI.getOpcode();
1149 const MCRegisterClass &ConversionRC =
1150 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1151 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1152 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1154 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1156 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1158 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1160 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1161 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1162 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1163 if (OpIdx == -1 || OpModsIdx == -1)
1164 continue;
1165 MCOperand &Op = MI.getOperand(OpIdx);
1166 if (!Op.isReg())
1167 continue;
1168 if (!ConversionRC.contains(Op.getReg()))
1169 continue;
1170 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1171 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1172 unsigned ModVal = OpMods.getImm();
1173 if (ModVal & OpSelMask) { // isHi
1174 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1175 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1176 }
1177 }
1178}
1179
1180// MAC opcodes have special old and src2 operands.
1181// src2 is tied to dst, while old is not tied (but assumed to be).
1183 constexpr int DST_IDX = 0;
1184 auto Opcode = MI.getOpcode();
1185 const auto &Desc = MCII->get(Opcode);
1186 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1187
1188 if (OldIdx != -1 && Desc.getOperandConstraint(
1189 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1190 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1191 assert(Desc.getOperandConstraint(
1192 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1194 (void)DST_IDX;
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1201// Create dummy old operand and insert dummy unused src2_modifiers
1203 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1204 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1206 AMDGPU::OpName::src2_modifiers);
1207}
1208
1210 unsigned Opc = MI.getOpcode();
1211
1212 int VDstInIdx =
1213 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1214 if (VDstInIdx != -1)
1215 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1216
1217 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1218 if (MI.getNumOperands() < DescNumOps &&
1219 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1221 auto Mods = collectVOPModifiers(MI);
1223 AMDGPU::OpName::op_sel);
1224 } else {
1225 // Insert dummy unused src modifiers.
1226 if (MI.getNumOperands() < DescNumOps &&
1227 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1229 AMDGPU::OpName::src0_modifiers);
1230
1231 if (MI.getNumOperands() < DescNumOps &&
1232 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1234 AMDGPU::OpName::src1_modifiers);
1235 }
1236}
1237
1240
1241 int VDstInIdx =
1242 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1243 if (VDstInIdx != -1)
1244 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1245
1246 unsigned Opc = MI.getOpcode();
1247 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1248 if (MI.getNumOperands() < DescNumOps &&
1249 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1250 auto Mods = collectVOPModifiers(MI);
1252 AMDGPU::OpName::op_sel);
1253 }
1254}
1255
1256// Given a wide tuple \p Reg check if it will overflow 256 registers.
1257// \returns \p Reg on success or NoRegister otherwise.
1259 const MCRegisterInfo &MRI) {
1260 unsigned NumRegs = RC.getSizeInBits() / 32;
1261 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1262 if (!Sub0)
1263 return Reg;
1264
1265 MCRegister BaseReg;
1266 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1267 BaseReg = AMDGPU::VGPR0;
1268 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1269 BaseReg = AMDGPU::AGPR0;
1270
1271 assert(BaseReg && "Only vector registers expected");
1272
1273 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1274}
1275
1276// Note that before gfx10, the MIMG encoding provided no information about
1277// VADDR size. Consequently, decoded instructions always show address as if it
1278// has 1 dword, which could be not really so.
1280 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1281
1282 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1283 AMDGPU::OpName::vdst);
1284
1285 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1286 AMDGPU::OpName::vdata);
1287 int VAddr0Idx =
1288 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1289 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1290 ? AMDGPU::OpName::srsrc
1291 : AMDGPU::OpName::rsrc;
1292 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1293 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1294 AMDGPU::OpName::dmask);
1295
1296 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1297 AMDGPU::OpName::tfe);
1298 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1299 AMDGPU::OpName::d16);
1300
1301 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1302 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1303 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1304
1305 assert(VDataIdx != -1);
1306 if (BaseOpcode->BVH) {
1307 // Add A16 operand for intersect_ray instructions
1308 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1309 return;
1310 }
1311
1312 bool IsAtomic = (VDstIdx != -1);
1313 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1314 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1315 bool IsNSA = false;
1316 bool IsPartialNSA = false;
1317 unsigned AddrSize = Info->VAddrDwords;
1318
1319 if (isGFX10Plus()) {
1320 unsigned DimIdx =
1321 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1322 int A16Idx =
1323 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1324 const AMDGPU::MIMGDimInfo *Dim =
1325 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1326 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1327
1328 AddrSize =
1329 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1330
1331 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1332 // VIMAGE insts other than BVH never use vaddr4.
1333 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1334 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1335 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1336 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1337 if (!IsNSA) {
1338 if (!IsVSample && AddrSize > 12)
1339 AddrSize = 16;
1340 } else {
1341 if (AddrSize > Info->VAddrDwords) {
1342 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1343 // The NSA encoding does not contain enough operands for the
1344 // combination of base opcode / dimension. Should this be an error?
1345 return;
1346 }
1347 IsPartialNSA = true;
1348 }
1349 }
1350 }
1351
1352 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1353 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1354
1355 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1356 if (D16 && AMDGPU::hasPackedD16(STI)) {
1357 DstSize = (DstSize + 1) / 2;
1358 }
1359
1360 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1361 DstSize += 1;
1362
1363 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1364 return;
1365
1366 int NewOpcode =
1367 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1368 if (NewOpcode == -1)
1369 return;
1370
1371 // Widen the register to the correct number of enabled channels.
1372 MCRegister NewVdata;
1373 if (DstSize != Info->VDataDwords) {
1374 auto DataRCID = MCII->getOpRegClassID(
1375 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1376
1377 // Get first subregister of VData
1378 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1379 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1380 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1381
1382 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1383 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1384 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1385 if (!NewVdata) {
1386 // It's possible to encode this such that the low register + enabled
1387 // components exceeds the register count.
1388 return;
1389 }
1390 }
1391
1392 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1393 // If using partial NSA on GFX11+ widen last address register.
1394 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1395 MCRegister NewVAddrSA;
1396 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1397 AddrSize != Info->VAddrDwords) {
1398 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1399 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1400 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1401
1402 auto AddrRCID = MCII->getOpRegClassID(
1403 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1404
1405 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1406 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1407 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1408 if (!NewVAddrSA)
1409 return;
1410 }
1411
1412 MI.setOpcode(NewOpcode);
1413
1414 if (NewVdata != AMDGPU::NoRegister) {
1415 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1416
1417 if (IsAtomic) {
1418 // Atomic operations have an additional operand (a copy of data)
1419 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1420 }
1421 }
1422
1423 if (NewVAddrSA) {
1424 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1425 } else if (IsNSA) {
1426 assert(AddrSize <= Info->VAddrDwords);
1427 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1428 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1429 }
1430}
1431
1432// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1433// decoder only adds to src_modifiers, so manually add the bits to the other
1434// operands.
1436 unsigned Opc = MI.getOpcode();
1437 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1438 auto Mods = collectVOPModifiers(MI, true);
1439
1440 if (MI.getNumOperands() < DescNumOps &&
1441 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1442 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1443
1444 if (MI.getNumOperands() < DescNumOps &&
1445 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1447 AMDGPU::OpName::op_sel);
1448 if (MI.getNumOperands() < DescNumOps &&
1449 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1451 AMDGPU::OpName::op_sel_hi);
1452 if (MI.getNumOperands() < DescNumOps &&
1453 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1455 AMDGPU::OpName::neg_lo);
1456 if (MI.getNumOperands() < DescNumOps &&
1457 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1459 AMDGPU::OpName::neg_hi);
1460}
1461
1462// Create dummy old operand and insert optional operands
1464 unsigned Opc = MI.getOpcode();
1465 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1466
1467 if (MI.getNumOperands() < DescNumOps &&
1468 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1469 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1470
1471 if (MI.getNumOperands() < DescNumOps &&
1472 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1474 AMDGPU::OpName::src0_modifiers);
1475
1476 if (MI.getNumOperands() < DescNumOps &&
1477 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1479 AMDGPU::OpName::src1_modifiers);
1480}
1481
1483 unsigned Opc = MI.getOpcode();
1484 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1485
1487
1488 if (MI.getNumOperands() < DescNumOps &&
1489 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1492 AMDGPU::OpName::op_sel);
1493 }
1494}
1495
1497 assert(HasLiteral && "Should have decoded a literal");
1498 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1499}
1500
1501const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1502 return getContext().getRegisterInfo()->
1503 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1504}
1505
1506inline
1508 const Twine& ErrMsg) const {
1509 *CommentStream << "Error: " + ErrMsg;
1510
1511 // ToDo: add support for error operands to MCInst.h
1512 // return MCOperand::createError(V);
1513 return MCOperand();
1514}
1515
1519
1520inline
1522 unsigned Val) const {
1523 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1524 if (Val >= RegCl.getNumRegs())
1525 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1526 ": unknown register " + Twine(Val));
1527 return createRegOperand(RegCl.getRegister(Val));
1528}
1529
1530inline
1532 unsigned Val) const {
1533 // ToDo: SI/CI have 104 SGPRs, VI - 102
1534 // Valery: here we accepting as much as we can, let assembler sort it out
1535 int shift = 0;
1536 switch (SRegClassID) {
1537 case AMDGPU::SGPR_32RegClassID:
1538 case AMDGPU::TTMP_32RegClassID:
1539 break;
1540 case AMDGPU::SGPR_64RegClassID:
1541 case AMDGPU::TTMP_64RegClassID:
1542 shift = 1;
1543 break;
1544 case AMDGPU::SGPR_96RegClassID:
1545 case AMDGPU::TTMP_96RegClassID:
1546 case AMDGPU::SGPR_128RegClassID:
1547 case AMDGPU::TTMP_128RegClassID:
1548 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1549 // this bundle?
1550 case AMDGPU::SGPR_256RegClassID:
1551 case AMDGPU::TTMP_256RegClassID:
1552 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1553 // this bundle?
1554 case AMDGPU::SGPR_288RegClassID:
1555 case AMDGPU::TTMP_288RegClassID:
1556 case AMDGPU::SGPR_320RegClassID:
1557 case AMDGPU::TTMP_320RegClassID:
1558 case AMDGPU::SGPR_352RegClassID:
1559 case AMDGPU::TTMP_352RegClassID:
1560 case AMDGPU::SGPR_384RegClassID:
1561 case AMDGPU::TTMP_384RegClassID:
1562 case AMDGPU::SGPR_512RegClassID:
1563 case AMDGPU::TTMP_512RegClassID:
1564 shift = 2;
1565 break;
1566 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1567 // this bundle?
1568 default:
1569 llvm_unreachable("unhandled register class");
1570 }
1571
1572 if (Val % (1 << shift)) {
1573 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1574 << ": scalar reg isn't aligned " << Val;
1575 }
1576
1577 return createRegOperand(SRegClassID, Val >> shift);
1578}
1579
1581 bool IsHi) const {
1582 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1583 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1584}
1585
1586// Decode Literals for insts which always have a literal in the encoding
1589 if (HasLiteral) {
1590 assert(
1592 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1593 if (Literal != Val)
1594 return errOperand(Val, "More than one unique literal is illegal");
1595 }
1596 HasLiteral = true;
1597 Literal = Val;
1598 return MCOperand::createImm(Literal);
1599}
1600
1603 if (HasLiteral) {
1604 if (Literal != Val)
1605 return errOperand(Val, "More than one unique literal is illegal");
1606 }
1607 HasLiteral = true;
1608 Literal = Val;
1609
1610 bool UseLit64 = Hi_32(Literal) == 0;
1612 LitModifier::Lit64, Literal, getContext()))
1613 : MCOperand::createImm(Literal);
1614}
1615
1618 const MCOperandInfo &OpDesc) const {
1619 // For now all literal constants are supposed to be unsigned integer
1620 // ToDo: deal with signed/unsigned 64-bit integer constants
1621 // ToDo: deal with float/double constants
1622 if (!HasLiteral) {
1623 if (Bytes.size() < 4) {
1624 return errOperand(0, "cannot read literal, inst bytes left " +
1625 Twine(Bytes.size()));
1626 }
1627 HasLiteral = true;
1628 Literal = eatBytes<uint32_t>(Bytes);
1629 }
1630
1631 // For disassembling always assume all inline constants are available.
1632 bool HasInv2Pi = true;
1633
1634 // Invalid instruction codes may contain literals for inline-only
1635 // operands, so we support them here as well.
1636 int64_t Val = Literal;
1637 bool UseLit = false;
1638 switch (OpDesc.OperandType) {
1639 default:
1640 llvm_unreachable("Unexpected operand type!");
1644 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1645 break;
1648 break;
1652 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1653 break;
1655 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1656 break;
1659 break;
1661 break;
1665 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1666 break;
1668 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1669 break;
1679 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1680 break;
1684 UseLit = AMDGPU::isInlinableLiteral64(Val << 32, HasInv2Pi);
1685 if (!UseLit)
1686 Val <<= 32;
1687 break;
1690 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1691 break;
1693 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1694 // decoding a literal in a position of a register operand. Give
1695 // it special handling in the caller, decodeImmOperands(), instead
1696 // of quietly allowing it here.
1697 break;
1698 }
1699
1702 : MCOperand::createImm(Val);
1703}
1704
1706 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1707
1708 if (!HasLiteral) {
1709 if (Bytes.size() < 8) {
1710 return errOperand(0, "cannot read literal64, inst bytes left " +
1711 Twine(Bytes.size()));
1712 }
1713 HasLiteral = true;
1714 Literal = eatBytes<uint64_t>(Bytes);
1715 }
1716
1717 bool UseLit64 = Hi_32(Literal) == 0;
1718
1719 UseLit64 |= AMDGPU::isInlinableLiteral64(
1720 Literal, STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm));
1721
1723 LitModifier::Lit64, Literal, getContext()))
1724 : MCOperand::createImm(Literal);
1725}
1726
1728 using namespace AMDGPU::EncValues;
1729
1730 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1731 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1732 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1733 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1734 // Cast prevents negative overflow.
1735}
1736
1737static int64_t getInlineImmVal32(unsigned Imm) {
1738 switch (Imm) {
1739 case 240:
1740 return llvm::bit_cast<uint32_t>(0.5f);
1741 case 241:
1742 return llvm::bit_cast<uint32_t>(-0.5f);
1743 case 242:
1744 return llvm::bit_cast<uint32_t>(1.0f);
1745 case 243:
1746 return llvm::bit_cast<uint32_t>(-1.0f);
1747 case 244:
1748 return llvm::bit_cast<uint32_t>(2.0f);
1749 case 245:
1750 return llvm::bit_cast<uint32_t>(-2.0f);
1751 case 246:
1752 return llvm::bit_cast<uint32_t>(4.0f);
1753 case 247:
1754 return llvm::bit_cast<uint32_t>(-4.0f);
1755 case 248: // 1 / (2 * PI)
1756 return 0x3e22f983;
1757 default:
1758 llvm_unreachable("invalid fp inline imm");
1759 }
1760}
1761
1762static int64_t getInlineImmVal64(unsigned Imm) {
1763 switch (Imm) {
1764 case 240:
1765 return llvm::bit_cast<uint64_t>(0.5);
1766 case 241:
1767 return llvm::bit_cast<uint64_t>(-0.5);
1768 case 242:
1769 return llvm::bit_cast<uint64_t>(1.0);
1770 case 243:
1771 return llvm::bit_cast<uint64_t>(-1.0);
1772 case 244:
1773 return llvm::bit_cast<uint64_t>(2.0);
1774 case 245:
1775 return llvm::bit_cast<uint64_t>(-2.0);
1776 case 246:
1777 return llvm::bit_cast<uint64_t>(4.0);
1778 case 247:
1779 return llvm::bit_cast<uint64_t>(-4.0);
1780 case 248: // 1 / (2 * PI)
1781 return 0x3fc45f306dc9c882;
1782 default:
1783 llvm_unreachable("invalid fp inline imm");
1784 }
1785}
1786
1787static int64_t getInlineImmValF16(unsigned Imm) {
1788 switch (Imm) {
1789 case 240:
1790 return 0x3800;
1791 case 241:
1792 return 0xB800;
1793 case 242:
1794 return 0x3C00;
1795 case 243:
1796 return 0xBC00;
1797 case 244:
1798 return 0x4000;
1799 case 245:
1800 return 0xC000;
1801 case 246:
1802 return 0x4400;
1803 case 247:
1804 return 0xC400;
1805 case 248: // 1 / (2 * PI)
1806 return 0x3118;
1807 default:
1808 llvm_unreachable("invalid fp inline imm");
1809 }
1810}
1811
1812static int64_t getInlineImmValBF16(unsigned Imm) {
1813 switch (Imm) {
1814 case 240:
1815 return 0x3F00;
1816 case 241:
1817 return 0xBF00;
1818 case 242:
1819 return 0x3F80;
1820 case 243:
1821 return 0xBF80;
1822 case 244:
1823 return 0x4000;
1824 case 245:
1825 return 0xC000;
1826 case 246:
1827 return 0x4080;
1828 case 247:
1829 return 0xC080;
1830 case 248: // 1 / (2 * PI)
1831 return 0x3E22;
1832 default:
1833 llvm_unreachable("invalid fp inline imm");
1834 }
1835}
1836
1837unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1838 using namespace AMDGPU;
1839
1840 switch (Width) {
1841 case 16:
1842 case 32:
1843 return VGPR_32RegClassID;
1844 case 64:
1845 return VReg_64RegClassID;
1846 case 96:
1847 return VReg_96RegClassID;
1848 case 128:
1849 return VReg_128RegClassID;
1850 case 160:
1851 return VReg_160RegClassID;
1852 case 192:
1853 return VReg_192RegClassID;
1854 case 256:
1855 return VReg_256RegClassID;
1856 case 288:
1857 return VReg_288RegClassID;
1858 case 320:
1859 return VReg_320RegClassID;
1860 case 352:
1861 return VReg_352RegClassID;
1862 case 384:
1863 return VReg_384RegClassID;
1864 case 512:
1865 return VReg_512RegClassID;
1866 case 1024:
1867 return VReg_1024RegClassID;
1868 }
1869 llvm_unreachable("Invalid register width!");
1870}
1871
1872unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1873 using namespace AMDGPU;
1874
1875 switch (Width) {
1876 case 16:
1877 case 32:
1878 return AGPR_32RegClassID;
1879 case 64:
1880 return AReg_64RegClassID;
1881 case 96:
1882 return AReg_96RegClassID;
1883 case 128:
1884 return AReg_128RegClassID;
1885 case 160:
1886 return AReg_160RegClassID;
1887 case 256:
1888 return AReg_256RegClassID;
1889 case 288:
1890 return AReg_288RegClassID;
1891 case 320:
1892 return AReg_320RegClassID;
1893 case 352:
1894 return AReg_352RegClassID;
1895 case 384:
1896 return AReg_384RegClassID;
1897 case 512:
1898 return AReg_512RegClassID;
1899 case 1024:
1900 return AReg_1024RegClassID;
1901 }
1902 llvm_unreachable("Invalid register width!");
1903}
1904
1905unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1906 using namespace AMDGPU;
1907
1908 switch (Width) {
1909 case 16:
1910 case 32:
1911 return SGPR_32RegClassID;
1912 case 64:
1913 return SGPR_64RegClassID;
1914 case 96:
1915 return SGPR_96RegClassID;
1916 case 128:
1917 return SGPR_128RegClassID;
1918 case 160:
1919 return SGPR_160RegClassID;
1920 case 256:
1921 return SGPR_256RegClassID;
1922 case 288:
1923 return SGPR_288RegClassID;
1924 case 320:
1925 return SGPR_320RegClassID;
1926 case 352:
1927 return SGPR_352RegClassID;
1928 case 384:
1929 return SGPR_384RegClassID;
1930 case 512:
1931 return SGPR_512RegClassID;
1932 }
1933 llvm_unreachable("Invalid register width!");
1934}
1935
1936unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1937 using namespace AMDGPU;
1938
1939 switch (Width) {
1940 case 16:
1941 case 32:
1942 return TTMP_32RegClassID;
1943 case 64:
1944 return TTMP_64RegClassID;
1945 case 128:
1946 return TTMP_128RegClassID;
1947 case 256:
1948 return TTMP_256RegClassID;
1949 case 288:
1950 return TTMP_288RegClassID;
1951 case 320:
1952 return TTMP_320RegClassID;
1953 case 352:
1954 return TTMP_352RegClassID;
1955 case 384:
1956 return TTMP_384RegClassID;
1957 case 512:
1958 return TTMP_512RegClassID;
1959 }
1960 llvm_unreachable("Invalid register width!");
1961}
1962
1963int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1964 using namespace AMDGPU::EncValues;
1965
1966 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1967 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1968
1969 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1970}
1971
1973 unsigned Val) const {
1974 using namespace AMDGPU::EncValues;
1975
1976 assert(Val < 1024); // enum10
1977
1978 bool IsAGPR = Val & 512;
1979 Val &= 511;
1980
1981 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1982 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1983 : getVgprClassId(Width), Val - VGPR_MIN);
1984 }
1985 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1986}
1987
1989 unsigned Width,
1990 unsigned Val) const {
1991 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1992 // decoded earlier.
1993 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1994 using namespace AMDGPU::EncValues;
1995
1996 if (Val <= SGPR_MAX) {
1997 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1998 static_assert(SGPR_MIN == 0);
1999 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
2000 }
2001
2002 int TTmpIdx = getTTmpIdx(Val);
2003 if (TTmpIdx >= 0) {
2004 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
2005 }
2006
2007 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2008 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2009 Val == LITERAL_CONST)
2010 return MCOperand::createImm(Val);
2011
2012 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2013 return decodeLiteral64Constant();
2014 }
2015
2016 switch (Width) {
2017 case 32:
2018 case 16:
2019 return decodeSpecialReg32(Val);
2020 case 64:
2021 return decodeSpecialReg64(Val);
2022 case 96:
2023 case 128:
2024 case 256:
2025 case 512:
2026 return decodeSpecialReg96Plus(Val);
2027 default:
2028 llvm_unreachable("unexpected immediate type");
2029 }
2030}
2031
2032// Bit 0 of DstY isn't stored in the instruction, because it's always the
2033// opposite of bit 0 of DstX.
2035 unsigned Val) const {
2036 int VDstXInd =
2037 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2038 assert(VDstXInd != -1);
2039 assert(Inst.getOperand(VDstXInd).isReg());
2040 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2041 Val |= ~XDstReg & 1;
2042 return createRegOperand(getVgprClassId(32), Val);
2043}
2044
2046 using namespace AMDGPU;
2047
2048 switch (Val) {
2049 // clang-format off
2050 case 102: return createRegOperand(FLAT_SCR_LO);
2051 case 103: return createRegOperand(FLAT_SCR_HI);
2052 case 104: return createRegOperand(XNACK_MASK_LO);
2053 case 105: return createRegOperand(XNACK_MASK_HI);
2054 case 106: return createRegOperand(VCC_LO);
2055 case 107: return createRegOperand(VCC_HI);
2056 case 108: return createRegOperand(TBA_LO);
2057 case 109: return createRegOperand(TBA_HI);
2058 case 110: return createRegOperand(TMA_LO);
2059 case 111: return createRegOperand(TMA_HI);
2060 case 124:
2061 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2062 case 125:
2063 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2064 case 126: return createRegOperand(EXEC_LO);
2065 case 127: return createRegOperand(EXEC_HI);
2066 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2067 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2068 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2069 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2070 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2071 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2072 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2073 case 251: return createRegOperand(SRC_VCCZ);
2074 case 252: return createRegOperand(SRC_EXECZ);
2075 case 253: return createRegOperand(SRC_SCC);
2076 case 254: return createRegOperand(LDS_DIRECT);
2077 default: break;
2078 // clang-format on
2079 }
2080 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2081}
2082
2084 using namespace AMDGPU;
2085
2086 switch (Val) {
2087 case 102: return createRegOperand(FLAT_SCR);
2088 case 104: return createRegOperand(XNACK_MASK);
2089 case 106: return createRegOperand(VCC);
2090 case 108: return createRegOperand(TBA);
2091 case 110: return createRegOperand(TMA);
2092 case 124:
2093 if (isGFX11Plus())
2094 return createRegOperand(SGPR_NULL);
2095 break;
2096 case 125:
2097 if (!isGFX11Plus())
2098 return createRegOperand(SGPR_NULL);
2099 break;
2100 case 126: return createRegOperand(EXEC);
2101 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2102 case 235: return createRegOperand(SRC_SHARED_BASE);
2103 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2104 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2105 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2106 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2107 case 251: return createRegOperand(SRC_VCCZ);
2108 case 252: return createRegOperand(SRC_EXECZ);
2109 case 253: return createRegOperand(SRC_SCC);
2110 default: break;
2111 }
2112 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2113}
2114
2116 using namespace AMDGPU;
2117
2118 switch (Val) {
2119 case 124:
2120 if (isGFX11Plus())
2121 return createRegOperand(SGPR_NULL);
2122 break;
2123 case 125:
2124 if (!isGFX11Plus())
2125 return createRegOperand(SGPR_NULL);
2126 break;
2127 default:
2128 break;
2129 }
2130 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2131}
2132
2134 const unsigned Val) const {
2135 using namespace AMDGPU::SDWA;
2136 using namespace AMDGPU::EncValues;
2137
2138 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2139 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2140 // XXX: cast to int is needed to avoid stupid warning:
2141 // compare with unsigned is always true
2142 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2143 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2144 return createRegOperand(getVgprClassId(Width),
2145 Val - SDWA9EncValues::SRC_VGPR_MIN);
2146 }
2147 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2148 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2149 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2150 return createSRegOperand(getSgprClassId(Width),
2151 Val - SDWA9EncValues::SRC_SGPR_MIN);
2152 }
2153 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2154 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2155 return createSRegOperand(getTtmpClassId(Width),
2156 Val - SDWA9EncValues::SRC_TTMP_MIN);
2157 }
2158
2159 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2160
2161 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2162 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2163 return MCOperand::createImm(SVal);
2164
2165 return decodeSpecialReg32(SVal);
2166 }
2167 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2168 return createRegOperand(getVgprClassId(Width), Val);
2169 llvm_unreachable("unsupported target");
2170}
2171
2173 return decodeSDWASrc(16, Val);
2174}
2175
2177 return decodeSDWASrc(32, Val);
2178}
2179
2181 using namespace AMDGPU::SDWA;
2182
2183 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2184 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2185 "SDWAVopcDst should be present only on GFX9+");
2186
2187 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2188
2189 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2190 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2191
2192 int TTmpIdx = getTTmpIdx(Val);
2193 if (TTmpIdx >= 0) {
2194 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2195 return createSRegOperand(TTmpClsId, TTmpIdx);
2196 }
2197 if (Val > SGPR_MAX) {
2198 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2199 }
2200 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2201 }
2202 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2203}
2204
2206 unsigned Val) const {
2207 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2208 ? decodeSrcOp(Inst, 32, Val)
2209 : decodeSrcOp(Inst, 64, Val);
2210}
2211
2213 unsigned Val) const {
2214 return decodeSrcOp(Inst, 32, Val);
2215}
2216
2219 return MCOperand();
2220 return MCOperand::createImm(Val);
2221}
2222
2224 using VersionField = AMDGPU::EncodingField<7, 0>;
2225 using W64Bit = AMDGPU::EncodingBit<13>;
2226 using W32Bit = AMDGPU::EncodingBit<14>;
2227 using MDPBit = AMDGPU::EncodingBit<15>;
2229
2230 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2231
2232 // Decode into a plain immediate if any unused bits are raised.
2233 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2234 return MCOperand::createImm(Imm);
2235
2236 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2237 const auto *I = find_if(
2238 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2239 return V.Code == Version;
2240 });
2241 MCContext &Ctx = getContext();
2242 const MCExpr *E;
2243 if (I == Versions.end())
2245 else
2246 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2247
2248 if (W64)
2249 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2250 if (W32)
2251 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2252 if (MDP)
2253 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2254
2255 return MCOperand::createExpr(E);
2256}
2257
2259 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2260}
2261
2263
2265 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2266}
2267
2269
2271
2275
2277 return STI.hasFeature(AMDGPU::FeatureGFX11);
2278}
2279
2283
2285 return STI.hasFeature(AMDGPU::FeatureGFX11_7Insts);
2286}
2287
2289 return STI.hasFeature(AMDGPU::FeatureGFX12);
2290}
2291
2295
2297
2301
2303
2307
2309 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2310}
2311
2315
2316//===----------------------------------------------------------------------===//
2317// AMDGPU specific symbol handling
2318//===----------------------------------------------------------------------===//
2319
2320/// Print a string describing the reserved bit range specified by Mask with
2321/// offset BaseBytes for use in error comments. Mask is a single continuous
2322/// range of 1s surrounded by zeros. The format here is meant to align with the
2323/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2324static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2325 SmallString<32> Result;
2326 raw_svector_ostream S(Result);
2327
2328 int TrailingZeros = llvm::countr_zero(Mask);
2329 int PopCount = llvm::popcount(Mask);
2330
2331 if (PopCount == 1) {
2332 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2333 } else {
2334 S << "bits in range ("
2335 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2336 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2337 }
2338
2339 return Result;
2340}
2341
2342#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2343#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2344 do { \
2345 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2346 } while (0)
2347#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2348 do { \
2349 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2350 << GET_FIELD(MASK) << '\n'; \
2351 } while (0)
2352
2353#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2354 do { \
2355 if (FourByteBuffer & (MASK)) { \
2356 return createStringError(std::errc::invalid_argument, \
2357 "kernel descriptor " DESC \
2358 " reserved %s set" MSG, \
2359 getBitRangeFromMask((MASK), 0).c_str()); \
2360 } \
2361 } while (0)
2362
2363#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2364#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2365 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2366#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2367 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2368#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2369 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2370
2371// NOLINTNEXTLINE(readability-identifier-naming)
2373 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2374 using namespace amdhsa;
2375 StringRef Indent = "\t";
2376
2377 // We cannot accurately backward compute #VGPRs used from
2378 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2379 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2380 // simply calculate the inverse of what the assembler does.
2381
2382 uint32_t GranulatedWorkitemVGPRCount =
2383 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2384
2385 uint32_t NextFreeVGPR =
2386 (GranulatedWorkitemVGPRCount + 1) *
2387 AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
2388
2389 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2390
2391 // We cannot backward compute values used to calculate
2392 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2393 // directives can't be computed:
2394 // .amdhsa_reserve_vcc
2395 // .amdhsa_reserve_flat_scratch
2396 // .amdhsa_reserve_xnack_mask
2397 // They take their respective default values if not specified in the assembly.
2398 //
2399 // GRANULATED_WAVEFRONT_SGPR_COUNT
2400 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2401 //
2402 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2403 // are set to 0. So while disassembling we consider that:
2404 //
2405 // GRANULATED_WAVEFRONT_SGPR_COUNT
2406 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2407 //
2408 // The disassembler cannot recover the original values of those 3 directives.
2409
2410 uint32_t GranulatedWavefrontSGPRCount =
2411 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2412
2413 if (isGFX10Plus())
2414 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2415 "must be zero on gfx10+");
2416
2417 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2419
2420 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2422 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2423 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2424 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2425 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2426 << '\n';
2427 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2428
2429 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2430
2431 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2432 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2433 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2434 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2435 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2436 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2437 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2438 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2439
2440 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2441
2442 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2443 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2444 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2445
2446 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2447
2448 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2449 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2450 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2451
2452 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2453 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2454
2455 // Bits [26].
2456 if (isGFX9Plus()) {
2457 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2458 } else {
2459 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2460 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2461 }
2462
2463 // Bits [27].
2464 if (isGFX1250Plus()) {
2465 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2466 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2467 } else {
2468 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2469 "COMPUTE_PGM_RSRC1");
2470 }
2471
2472 // Bits [28].
2473 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2474
2475 // Bits [29-31].
2476 if (isGFX10Plus()) {
2477 // WGP_MODE is not available on GFX1250.
2478 if (!isGFX1250Plus()) {
2479 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2480 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2481 }
2482 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2483 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2484 } else {
2485 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2486 "COMPUTE_PGM_RSRC1");
2487 }
2488
2489 if (isGFX12Plus())
2490 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2491 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2492
2493 return true;
2494}
2495
2496// NOLINTNEXTLINE(readability-identifier-naming)
2498 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2499 using namespace amdhsa;
2500 StringRef Indent = "\t";
2502 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2503 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2504 else
2505 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2506 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2507 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2508 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2509 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2510 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2511 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2512 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2513 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2514 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2515 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2516 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2517
2518 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2519 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2520 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2521
2523 ".amdhsa_exception_fp_ieee_invalid_op",
2524 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2525 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2528 ".amdhsa_exception_fp_ieee_div_zero",
2529 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2530 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2531 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2532 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2533 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2534 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2535 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2536 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2537 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2538
2539 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2540
2541 return true;
2542}
2543
2544// NOLINTNEXTLINE(readability-identifier-naming)
2546 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2547 using namespace amdhsa;
2548 StringRef Indent = "\t";
2549 if (isGFX90A()) {
2550 KdStream << Indent << ".amdhsa_accum_offset "
2551 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2552 << '\n';
2553
2554 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2555
2556 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2557 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2558 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2559 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2560 } else if (isGFX10Plus()) {
2561 // Bits [0-3].
2562 if (!isGFX12Plus()) {
2563 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2564 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2565 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2566 } else {
2568 "SHARED_VGPR_COUNT",
2569 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2570 }
2571 } else {
2572 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2573 "COMPUTE_PGM_RSRC3",
2574 "must be zero on gfx12+");
2575 }
2576
2577 // Bits [4-11].
2578 if (isGFX11()) {
2579 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2580 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2581 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2582 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2583 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2584 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2585 } else if (isGFX12Plus()) {
2586 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2587 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2588 } else {
2589 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2590 "COMPUTE_PGM_RSRC3",
2591 "must be zero on gfx10");
2592 }
2593
2594 // Bits [12].
2595 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2596 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2597
2598 // Bits [13].
2599 if (isGFX12Plus()) {
2601 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2602 } else {
2603 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2604 "COMPUTE_PGM_RSRC3",
2605 "must be zero on gfx10 or gfx11");
2606 }
2607
2608 // Bits [14-21].
2609 if (isGFX1250Plus()) {
2610 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2611 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2613 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2615 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2617 "ENABLE_DIDT_THROTTLE",
2618 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2619 } else {
2620 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2621 "COMPUTE_PGM_RSRC3",
2622 "must be zero on gfx10+");
2623 }
2624
2625 // Bits [22-30].
2626 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2627 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2628
2629 // Bits [31].
2630 if (isGFX11Plus()) {
2632 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2633 } else {
2634 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2635 "COMPUTE_PGM_RSRC3",
2636 "must be zero on gfx10");
2637 }
2638 } else if (FourByteBuffer) {
2639 return createStringError(
2640 std::errc::invalid_argument,
2641 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2642 }
2643 return true;
2644}
2645#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2646#undef PRINT_DIRECTIVE
2647#undef GET_FIELD
2648#undef CHECK_RESERVED_BITS_IMPL
2649#undef CHECK_RESERVED_BITS
2650#undef CHECK_RESERVED_BITS_MSG
2651#undef CHECK_RESERVED_BITS_DESC
2652#undef CHECK_RESERVED_BITS_DESC_MSG
2653
2654/// Create an error object to return from onSymbolStart for reserved kernel
2655/// descriptor bits being set.
2656static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2657 const char *Msg = "") {
2658 return createStringError(
2659 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2660 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2661}
2662
2663/// Create an error object to return from onSymbolStart for reserved kernel
2664/// descriptor bytes being set.
2665static Error createReservedKDBytesError(unsigned BaseInBytes,
2666 unsigned WidthInBytes) {
2667 // Create an error comment in the same format as the "Kernel Descriptor"
2668 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2669 return createStringError(
2670 std::errc::invalid_argument,
2671 "kernel descriptor reserved bits in range (%u:%u) set",
2672 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2673}
2674
2677 raw_string_ostream &KdStream) const {
2678#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2679 do { \
2680 KdStream << Indent << DIRECTIVE " " \
2681 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2682 } while (0)
2683
2684 uint16_t TwoByteBuffer = 0;
2685 uint32_t FourByteBuffer = 0;
2686
2687 StringRef ReservedBytes;
2688 StringRef Indent = "\t";
2689
2690 assert(Bytes.size() == 64);
2691 DataExtractor DE(Bytes, /*IsLittleEndian=*/true);
2692
2693 switch (Cursor.tell()) {
2695 FourByteBuffer = DE.getU32(Cursor);
2696 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2697 << '\n';
2698 return true;
2699
2701 FourByteBuffer = DE.getU32(Cursor);
2702 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2703 << FourByteBuffer << '\n';
2704 return true;
2705
2707 FourByteBuffer = DE.getU32(Cursor);
2708 KdStream << Indent << ".amdhsa_kernarg_size "
2709 << FourByteBuffer << '\n';
2710 return true;
2711
2713 // 4 reserved bytes, must be 0.
2714 ReservedBytes = DE.getBytes(Cursor, 4);
2715 for (char B : ReservedBytes) {
2716 if (B != 0)
2718 }
2719 return true;
2720
2722 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2723 // So far no directive controls this for Code Object V3, so simply skip for
2724 // disassembly.
2725 DE.skip(Cursor, 8);
2726 return true;
2727
2729 // 20 reserved bytes, must be 0.
2730 ReservedBytes = DE.getBytes(Cursor, 20);
2731 for (char B : ReservedBytes) {
2732 if (B != 0)
2734 }
2735 return true;
2736
2738 FourByteBuffer = DE.getU32(Cursor);
2739 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2740
2742 FourByteBuffer = DE.getU32(Cursor);
2743 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2744
2746 FourByteBuffer = DE.getU32(Cursor);
2747 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2748
2750 using namespace amdhsa;
2751 TwoByteBuffer = DE.getU16(Cursor);
2752
2754 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2755 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2756 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2758 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2760 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2761 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2762 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2763 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2765 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2766 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2767 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2768 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2769
2770 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2771 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2773
2774 // Reserved for GFX9
2775 if (isGFX9() &&
2776 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2778 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2779 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2780 }
2781 if (isGFX10Plus()) {
2782 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2783 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2784 }
2785
2786 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2787 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2788 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2789
2790 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2791 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2793 }
2794
2795 return true;
2796
2798 using namespace amdhsa;
2799 TwoByteBuffer = DE.getU16(Cursor);
2800 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2801 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2802 KERNARG_PRELOAD_SPEC_LENGTH);
2803 }
2804
2805 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2806 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2807 KERNARG_PRELOAD_SPEC_OFFSET);
2808 }
2809 return true;
2810
2812 // 4 bytes from here are reserved, must be 0.
2813 ReservedBytes = DE.getBytes(Cursor, 4);
2814 for (char B : ReservedBytes) {
2815 if (B != 0)
2817 }
2818 return true;
2819
2820 default:
2821 llvm_unreachable("Unhandled index. Case statements cover everything.");
2822 return true;
2823 }
2824#undef PRINT_DIRECTIVE
2825}
2826
2828 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2829
2830 // CP microcode requires the kernel descriptor to be 64 aligned.
2831 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2832 return createStringError(std::errc::invalid_argument,
2833 "kernel descriptor must be 64-byte aligned");
2834
2835 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2836 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2837 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2838 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2839 // when required.
2840 if (isGFX10Plus()) {
2841 uint16_t KernelCodeProperties =
2844 EnableWavefrontSize32 =
2845 AMDHSA_BITS_GET(KernelCodeProperties,
2846 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2847 }
2848
2849 std::string Kd;
2850 raw_string_ostream KdStream(Kd);
2851 KdStream << ".amdhsa_kernel " << KdName << '\n';
2852
2854 while (C && C.tell() < Bytes.size()) {
2855 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2856
2857 cantFail(C.takeError());
2858
2859 if (!Res)
2860 return Res;
2861 }
2862 KdStream << ".end_amdhsa_kernel\n";
2863 outs() << KdStream.str();
2864 return true;
2865}
2866
2868 uint64_t &Size,
2869 ArrayRef<uint8_t> Bytes,
2870 uint64_t Address) const {
2871 // Right now only kernel descriptor needs to be handled.
2872 // We ignore all other symbols for target specific handling.
2873 // TODO:
2874 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2875 // Object V2 and V3 when symbols are marked protected.
2876
2877 // amd_kernel_code_t for Code Object V2.
2878 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2879 Size = 256;
2880 return createStringError(std::errc::invalid_argument,
2881 "code object v2 is not supported");
2882 }
2883
2884 // Code Object V3 kernel descriptors.
2885 StringRef Name = Symbol.Name;
2886 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2887 Size = 64; // Size = 64 regardless of success or failure.
2888 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2889 }
2890
2891 return false;
2892}
2893
2894const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2895 int64_t Val) {
2896 MCContext &Ctx = getContext();
2897 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2898 // Note: only set value to Val on a new symbol in case an dissassembler
2899 // has already been initialized in this context.
2900 if (!Sym->isVariable()) {
2902 } else {
2903 int64_t Res = ~Val;
2904 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2905 if (!Valid || Res != Val)
2906 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2907 }
2908 return MCSymbolRefExpr::create(Sym, Ctx);
2909}
2910
2912 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2913
2914 // Check for MUBUF and MTBUF instructions
2915 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2916 return true;
2917
2918 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2919 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2920 return true;
2921
2922 return false;
2923}
2924
2925//===----------------------------------------------------------------------===//
2926// AMDGPUSymbolizer
2927//===----------------------------------------------------------------------===//
2928
2929// Try to find symbol name for specified label
2931 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2932 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2933 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2934
2935 if (!IsBranch) {
2936 return false;
2937 }
2938
2939 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2940 if (!Symbols)
2941 return false;
2942
2943 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2944 return Val.Addr == static_cast<uint64_t>(Value) &&
2945 Val.Type == ELF::STT_NOTYPE;
2946 });
2947 if (Result != Symbols->end()) {
2948 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2949 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2951 return true;
2952 }
2953 // Add to list of referenced addresses, so caller can synthesize a label.
2954 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2955 return false;
2956}
2957
2959 int64_t Value,
2960 uint64_t Address) {
2961 llvm_unreachable("unimplemented");
2962}
2963
2964//===----------------------------------------------------------------------===//
2965// Initialization
2966//===----------------------------------------------------------------------===//
2967
2969 LLVMOpInfoCallback /*GetOpInfo*/,
2970 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2971 void *DisInfo,
2972 MCContext *Ctx,
2973 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2974 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2975}
2976
2978 const MCSubtargetInfo &STI,
2979 MCContext &Ctx) {
2980 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2981}
2982
2983extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const T * data() const
Definition ArrayRef.h:138
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1419
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ STT_OBJECT
Definition ELF.h:1420
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.