LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81 unsigned EFlags) const {
82 OS << "\t.amdgcn_target \""
83 << STI.getTargetTriple().normalize(Triple::CanonicalForm::FOUR_IDENT)
84 << '-';
85
86 // Get CPU name from ELF e_flags MACH field
87 unsigned MACH = EFlags & ELF::EF_AMDGPU_MACH;
88
89#define X(NUM, ENUM, NAME) \
90 case ELF::ENUM: \
91 OS << NAME; \
92 break;
93 switch (MACH) {
95 default:
96 OS << "unknown";
97 break;
98 }
99#undef X
100
101 // Add xnack and sramecc from ELF flags (v4 format)
102 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV4) {
103 unsigned SrameccSetting = EFlags & ELF::EF_AMDGPU_FEATURE_SRAMECC_V4;
104 switch (SrameccSetting) {
107 break;
109 OS << ":sramecc-";
110 break;
112 OS << ":sramecc+";
113 break;
114 }
115
116 unsigned XnackSetting = EFlags & ELF::EF_AMDGPU_FEATURE_XNACK_V4;
117 switch (XnackSetting) {
120 break;
122 OS << ":xnack-";
123 break;
125 OS << ":xnack+";
126 break;
127 }
128 }
129
130 OS << "\"\n";
131}
132
134addOperand(MCInst &Inst, const MCOperand& Opnd) {
135 Inst.addOperand(Opnd);
136 return Opnd.isValid() ?
139}
140
142 AMDGPU::OpName Name) {
143 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
144 if (OpIdx != -1) {
145 auto *I = MI.begin();
146 std::advance(I, OpIdx);
147 MI.insert(I, Op);
148 }
149 return OpIdx;
150}
151
152static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
153 uint64_t Addr,
154 const MCDisassembler *Decoder) {
155 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
156
157 // Our branches take a simm16.
158 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
159
160 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
162 return addOperand(Inst, MCOperand::createImm(Imm));
163}
164
165static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
166 const MCDisassembler *Decoder) {
167 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
168 int64_t Offset;
169 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
171 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
172 Offset = Imm & 0xFFFFF;
173 } else { // GFX9+ supports 21-bit signed offsets.
175 }
177}
178
179static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
180 const MCDisassembler *Decoder) {
181 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
182 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
183}
184
185static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
186 uint64_t Addr,
187 const MCDisassembler *Decoder) {
188 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
189 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
190}
191
192static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
193 const MCDisassembler *Decoder) {
194 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
195 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
196}
197
198#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
199 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
200 uint64_t /*Addr*/, \
201 const MCDisassembler *Decoder) { \
202 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
203 return addOperand(Inst, DAsm->DecoderName(Imm)); \
204 }
205
206// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
207// number of register. Used by VGPR only and AGPR only operands.
208#define DECODE_OPERAND_REG_8(RegClass) \
209 static DecodeStatus Decode##RegClass##RegisterClass( \
210 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
211 const MCDisassembler *Decoder) { \
212 assert(Imm < (1 << 8) && "8-bit encoding"); \
213 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
214 return addOperand( \
215 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
216 }
217
218#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
219 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
220 const MCDisassembler *Decoder) { \
221 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
222 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
223 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
224 }
225
226static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
227 unsigned OpWidth, unsigned Imm, unsigned EncImm,
228 const MCDisassembler *Decoder) {
229 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
230 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
231 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
232}
233
234// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
235// get register class. Used by SGPR only operands.
236#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
237 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
238
239#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
240 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
241
242// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
243// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
244// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
245// Used by AV_ register classes (AGPR or VGPR only register operands).
246template <unsigned OpWidth>
247static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
248 const MCDisassembler *Decoder) {
249 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
250 Decoder);
251}
252
253// Decoder for Src(9-bit encoding) registers only.
254template <unsigned OpWidth>
255static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
256 uint64_t /* Addr */,
257 const MCDisassembler *Decoder) {
258 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
259}
260
261// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
262// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
263// only.
264template <unsigned OpWidth>
265static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
266 const MCDisassembler *Decoder) {
267 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
268}
269
270// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
271// Imm{9} is acc, registers only.
272template <unsigned OpWidth>
273static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
274 uint64_t /* Addr */,
275 const MCDisassembler *Decoder) {
276 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
277}
278
279// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
280// register from RegClass or immediate. Registers that don't belong to RegClass
281// will be decoded and InstPrinter will report warning. Immediate will be
282// decoded into constant matching the OperandType (important for floating point
283// types).
284template <unsigned OpWidth>
285static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
286 uint64_t /* Addr */,
287 const MCDisassembler *Decoder) {
288 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
289}
290
291// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
292// and decode using 'enum10' from decodeSrcOp.
293template <unsigned OpWidth>
294static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
295 uint64_t /* Addr */,
296 const MCDisassembler *Decoder) {
297 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
298}
299
300// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
301// when RegisterClass is used as an operand. Most often used for destination
302// operands.
303
305DECODE_OPERAND_REG_8(VGPR_32_Lo128)
308DECODE_OPERAND_REG_8(VReg_128)
309DECODE_OPERAND_REG_8(VReg_192)
310DECODE_OPERAND_REG_8(VReg_256)
311DECODE_OPERAND_REG_8(VReg_288)
312DECODE_OPERAND_REG_8(VReg_320)
313DECODE_OPERAND_REG_8(VReg_352)
314DECODE_OPERAND_REG_8(VReg_384)
315DECODE_OPERAND_REG_8(VReg_512)
316DECODE_OPERAND_REG_8(VReg_1024)
317
318DECODE_OPERAND_SREG_7(SReg_32, 32)
319DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
320DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
321DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
322DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
323DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
324DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
325DECODE_OPERAND_SREG_7(SReg_96, 96)
326DECODE_OPERAND_SREG_7(SReg_128, 128)
327DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
328DECODE_OPERAND_SREG_7(SReg_256, 256)
329DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
330DECODE_OPERAND_SREG_7(SReg_512, 512)
331
332DECODE_OPERAND_SREG_8(SReg_64, 64)
333
336DECODE_OPERAND_REG_8(AReg_128)
337DECODE_OPERAND_REG_8(AReg_256)
338DECODE_OPERAND_REG_8(AReg_512)
339DECODE_OPERAND_REG_8(AReg_1024)
340
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
346
347 bool IsHi = Imm & (1 << 9);
348 unsigned RegIdx = Imm & 0xff;
349 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
350 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
351}
352
353static DecodeStatus
355 const MCDisassembler *Decoder) {
356 assert(isUInt<8>(Imm) && "8-bit encoding expected");
357
358 bool IsHi = Imm & (1 << 7);
359 unsigned RegIdx = Imm & 0x7f;
360 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
361 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
362}
363
364template <unsigned OpWidth>
366 uint64_t /*Addr*/,
367 const MCDisassembler *Decoder) {
368 assert(isUInt<9>(Imm) && "9-bit encoding expected");
369
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 if (Imm & AMDGPU::EncValues::IS_VGPR) {
372 bool IsHi = Imm & (1 << 7);
373 unsigned RegIdx = Imm & 0x7f;
374 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
375 }
376 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
377}
378
379template <unsigned OpWidth>
380static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
381 uint64_t /*Addr*/,
382 const MCDisassembler *Decoder) {
383 assert(isUInt<10>(Imm) && "10-bit encoding expected");
384
385 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
386 if (Imm & AMDGPU::EncValues::IS_VGPR) {
387 bool IsHi = Imm & (1 << 9);
388 unsigned RegIdx = Imm & 0xff;
389 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
390 }
391 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
392}
393
394static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
395 uint64_t /*Addr*/,
396 const MCDisassembler *Decoder) {
397 assert(isUInt<10>(Imm) && "10-bit encoding expected");
398 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
399
400 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
401
402 bool IsHi = Imm & (1 << 9);
403 unsigned RegIdx = Imm & 0xff;
404 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
405}
406
407static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
408 uint64_t Addr,
409 const MCDisassembler *Decoder) {
410 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
411 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
412}
413
415 uint64_t Addr,
416 const MCDisassembler *Decoder) {
417 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
418 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
419}
420
421static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
422 uint64_t Addr, const void *Decoder) {
423 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
424 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
425}
426
427static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
428 const MCDisassembler *Decoder) {
429 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
430 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
431}
432
433template <unsigned Opw>
434static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
435 uint64_t /* Addr */,
436 const MCDisassembler *Decoder) {
437 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
438}
439
440static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
441 uint64_t Addr,
442 const MCDisassembler *Decoder) {
443 assert(Imm < (1 << 9) && "9-bit encoding");
444 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
445 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
446}
447
448#define DECODE_SDWA(DecName) \
449DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
450
451DECODE_SDWA(Src32)
452DECODE_SDWA(Src16)
453DECODE_SDWA(VopcDst)
454
455static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
456 uint64_t /* Addr */,
457 const MCDisassembler *Decoder) {
458 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
459 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
460}
461
462#include "AMDGPUGenDisassemblerTables.inc"
463
464namespace {
465// Define bitwidths for various types used to instantiate the decoder.
466template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
467template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
468template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
469template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
470} // namespace
471
472//===----------------------------------------------------------------------===//
473//
474//===----------------------------------------------------------------------===//
475
476template <typename InsnType>
478 InsnType Inst, uint64_t Address,
479 raw_ostream &Comments) const {
480 assert(MI.getOpcode() == 0);
481 assert(MI.getNumOperands() == 0);
482 MCInst TmpInst;
483 HasLiteral = false;
484 const auto SavedBytes = Bytes;
485
486 SmallString<64> LocalComments;
487 raw_svector_ostream LocalCommentStream(LocalComments);
488 CommentStream = &LocalCommentStream;
489
490 DecodeStatus Res =
491 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
492
493 CommentStream = nullptr;
494
495 if (Res != MCDisassembler::Fail) {
496 MI = TmpInst;
497 Comments << LocalComments;
499 }
500 Bytes = SavedBytes;
502}
503
504template <typename InsnType>
507 MCInst &MI, InsnType Inst, uint64_t Address,
508 raw_ostream &Comments) const {
509 for (const uint8_t *T : {Table1, Table2}) {
510 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
511 return Res;
512 }
514}
515
516template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
517 assert(Bytes.size() >= sizeof(T));
518 const auto Res =
520 Bytes = Bytes.slice(sizeof(T));
521 return Res;
522}
523
524static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
525 using namespace llvm::support::endian;
526 assert(Bytes.size() >= 12);
527 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
528 Bytes = Bytes.slice(8);
529 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
530 Bytes = Bytes.slice(4);
531 return (Hi << 64) | Lo;
532}
533
534static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
535 using namespace llvm::support::endian;
536 assert(Bytes.size() >= 16);
537 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
538 Bytes = Bytes.slice(8);
539 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
540 Bytes = Bytes.slice(8);
541 return (Hi << 64) | Lo;
542}
543
544void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
545 const MCInstrInfo &MCII) const {
546 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
547 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
548 if (OpNo >= MI.getNumOperands())
549 continue;
550
551 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
552 // defined to take VGPR_32, but in reality allowing inline constants.
553 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
554 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
555 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
556 continue;
557
558 MCOperand &Op = MI.getOperand(OpNo);
559 if (!Op.isImm())
560 continue;
561 int64_t Imm = Op.getImm();
564 Op = decodeIntImmed(Imm);
565 continue;
566 }
567
569 Op = decodeLiteralConstant(Desc, OpDesc);
570 continue;
571 }
572
575 switch (OpDesc.OperandType) {
581 break;
584 Imm = getInlineImmValF16(Imm);
585 break;
588 Imm = getInlineImmValF16(Imm);
589 break;
591 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
592 // halves, so we need to produce the duplicated value for correct
593 // round-trip.
594 if (isGFX11Plus()) {
595 int64_t F16Val = getInlineImmValF16(Imm);
596 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
597 } else {
598 Imm = getInlineImmValF16(Imm);
599 }
600 break;
601 }
609 Imm = getInlineImmVal64(Imm);
610 break;
611 default:
612 Imm = getInlineImmVal32(Imm);
613 }
614 Op.setImm(Imm);
615 }
616 }
617}
618
620 ArrayRef<uint8_t> Bytes_,
622 raw_ostream &CS) const {
623 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
624 Bytes = Bytes_.slice(0, MaxInstBytesNum);
625
626 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
627 // there are fewer bytes left). This will be overridden on success.
628 Size = std::min((size_t)4, Bytes_.size());
629
630 do {
631 // ToDo: better to switch encoding length using some bit predicate
632 // but it is unknown yet, so try all we can
633
634 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
635 // encodings
636 if (isGFX1250Plus() && Bytes.size() >= 16) {
637 std::bitset<128> DecW = eat16Bytes(Bytes);
638 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
639 break;
640 Bytes = Bytes_.slice(0, MaxInstBytesNum);
641 }
642
643 if (isGFX11Plus() && Bytes.size() >= 12) {
644 std::bitset<96> DecW = eat12Bytes(Bytes);
645
646 if (isGFX1170() &&
647 tryDecodeInst(DecoderTableGFX117096, DecoderTableGFX1170_FAKE1696, MI,
648 DecW, Address, CS))
649 break;
650
651 if (isGFX11() &&
652 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
653 DecW, Address, CS))
654 break;
655
656 if (isGFX1250() &&
657 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
658 DecW, Address, CS))
659 break;
660
661 if (isGFX12() &&
662 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
663 DecW, Address, CS))
664 break;
665
666 if (isGFX12() &&
667 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
668 break;
669
670 if (isGFX13() &&
671 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
672 DecW, Address, CS))
673 break;
674
675 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
676 // Return 8 bytes for a potential literal.
677 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
678
679 if (isGFX1250() &&
680 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
681 break;
682 }
683
684 // Reinitialize Bytes
685 Bytes = Bytes_.slice(0, MaxInstBytesNum);
686
687 } else if (Bytes.size() >= 16 &&
688 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
689 std::bitset<128> DecW = eat16Bytes(Bytes);
690 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
691 break;
692
693 // Reinitialize Bytes
694 Bytes = Bytes_.slice(0, MaxInstBytesNum);
695 }
696
697 if (Bytes.size() >= 8) {
698 const uint64_t QW = eatBytes<uint64_t>(Bytes);
699
700 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
701 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
702 break;
703
704 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
705 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
706 break;
707
708 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
709 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
710 break;
711
712 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
713 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
714 // table first so we print the correct name.
715 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
716 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
717 break;
718
719 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
720 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
721 break;
722
723 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
724 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
725 break;
726
727 if ((isVI() || isGFX9()) &&
728 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
729 break;
730
731 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
732 break;
733
734 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
735 break;
736
737 if (isGFX1250() &&
738 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
739 QW, Address, CS))
740 break;
741
742 if (isGFX12() &&
743 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
744 Address, CS))
745 break;
746
747 if (isGFX1170() &&
748 tryDecodeInst(DecoderTableGFX117064, DecoderTableGFX1170_FAKE1664, MI,
749 QW, Address, CS))
750 break;
751
752 if (isGFX11() &&
753 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
754 Address, CS))
755 break;
756
757 if (isGFX1170() &&
758 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
759 break;
760
761 if (isGFX11() &&
762 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
763 break;
764
765 if (isGFX12() &&
766 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
767 break;
768
769 if (isGFX13() &&
770 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
771 Address, CS))
772 break;
773
774 // Reinitialize Bytes
775 Bytes = Bytes_.slice(0, MaxInstBytesNum);
776 }
777
778 // Try decode 32-bit instruction
779 if (Bytes.size() >= 4) {
780 const uint32_t DW = eatBytes<uint32_t>(Bytes);
781
782 if ((isVI() || isGFX9()) &&
783 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
784 break;
785
786 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
787 break;
788
789 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
790 break;
791
792 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
793 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
794 break;
795
796 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
797 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
798 break;
799
800 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
801 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
802 break;
803
804 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
805 break;
806
807 if (isGFX1170() &&
808 tryDecodeInst(DecoderTableGFX117032, DecoderTableGFX1170_FAKE1632, MI,
809 DW, Address, CS))
810 break;
811
812 if (isGFX11() &&
813 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
814 Address, CS))
815 break;
816
817 if (isGFX1250() &&
818 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
819 DW, Address, CS))
820 break;
821
822 if (isGFX12() &&
823 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
824 Address, CS))
825 break;
826
827 if (isGFX13() &&
828 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
829 Address, CS))
830 break;
831 }
832
834 } while (false);
835
837
838 decodeImmOperands(MI, *MCII);
839
840 if (SIInstrFlags::isDPP(*MCII, MI)) {
841 if (isMacDPP(MI))
843
844 if (SIInstrFlags::isVOP3P(*MCII, MI))
846 else if (SIInstrFlags::isVOPC(*MCII, MI))
847 convertVOPCDPPInst(MI); // Special VOP3 case
848 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
849 convertVOPC64DPPInst(MI); // Special VOP3 case
850 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
851 -1)
853 else if (SIInstrFlags::isVOP3(*MCII, MI))
854 convertVOP3DPPInst(MI); // Regular VOP3 case
855 }
856
858
859 if (AMDGPU::isMAC(MI.getOpcode())) {
860 // Insert dummy unused src2_modifiers.
862 AMDGPU::OpName::src2_modifiers);
863 }
864
865 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
866 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
867 // Insert dummy unused src2_modifiers.
869 AMDGPU::OpName::src2_modifiers);
870 }
871
872 if (SIInstrFlags::isDS(*MCII, MI) && !AMDGPU::hasGDS(STI)) {
873 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
874 }
875
876 if (SIInstrFlags::isMUBUF(*MCII, MI) || SIInstrFlags::isFLAT(*MCII, MI) ||
877 SIInstrFlags::isSMRD(*MCII, MI)) {
878 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
879 AMDGPU::OpName::cpol);
880 if (CPolPos != -1) {
881 unsigned CPol =
883 if (MI.getNumOperands() <= (unsigned)CPolPos) {
885 AMDGPU::OpName::cpol);
886 } else if (CPol) {
887 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
888 }
889 }
890 }
891
892 if (SIInstrFlags::isBuffer(*MCII, MI) &&
893 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
894 // GFX90A lost TFE, its place is occupied by ACC.
895 int TFEOpIdx =
896 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
897 if (TFEOpIdx != -1) {
898 auto *TFEIter = MI.begin();
899 std::advance(TFEIter, TFEOpIdx);
900 MI.insert(TFEIter, MCOperand::createImm(0));
901 }
902 }
903
904 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
906 int OffsetIdx =
907 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
908 if (OffsetIdx != -1) {
909 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
910 int64_t SignedOffset = SignExtend64<24>(Imm);
911 if (SignedOffset < 0)
913 }
914 }
915
916 if (SIInstrFlags::isBuffer(*MCII, MI)) {
917 int SWZOpIdx =
918 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
919 if (SWZOpIdx != -1) {
920 auto *SWZIter = MI.begin();
921 std::advance(SWZIter, SWZOpIdx);
922 MI.insert(SWZIter, MCOperand::createImm(0));
923 }
924 }
925
926 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
928 int VAddr0Idx =
929 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
930 int RsrcIdx =
931 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
932 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
933 if (VAddr0Idx >= 0 && NSAArgs > 0) {
934 unsigned NSAWords = (NSAArgs + 3) / 4;
935 if (Bytes.size() < 4 * NSAWords)
937 for (unsigned i = 0; i < NSAArgs; ++i) {
938 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
939 auto VAddrRCID =
940 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
941 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
942 }
943 Bytes = Bytes.slice(4 * NSAWords);
944 }
945
947 }
948
951
952 if (SIInstrFlags::isEXP(*MCII, MI))
954
955 if (SIInstrFlags::isVINTERP(*MCII, MI))
957
958 if (SIInstrFlags::isSDWA(*MCII, MI))
960
961 if (SIInstrFlags::isMAI(*MCII, MI))
963
964 if (SIInstrFlags::isWMMA(*MCII, MI))
966
967 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
968 AMDGPU::OpName::vdst_in);
969 if (VDstIn_Idx != -1) {
970 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
972 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
973 !MI.getOperand(VDstIn_Idx).isReg() ||
974 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
975 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
976 MI.erase(&MI.getOperand(VDstIn_Idx));
978 MCOperand::createReg(MI.getOperand(Tied).getReg()),
979 AMDGPU::OpName::vdst_in);
980 }
981 }
982
983 bool IsSOPK = SIInstrFlags::isSOPK(*MCII, MI);
984 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
986
987 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
988 // have EXEC as implicit destination. Issue a warning if encoding for
989 // vdst is not EXEC.
990 if (SIInstrFlags::isVOP3(*MCII, MI) &&
991 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
992 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
993 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
994 if (Bytes_[0] != ExecEncoding)
996 }
997
998 Size = MaxInstBytesNum - Bytes.size();
999 return Status;
1000}
1001
1003 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
1004 // The MCInst still has these fields even though they are no longer encoded
1005 // in the GFX11 instruction.
1006 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
1007 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
1008 }
1009}
1010
1013 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
1014 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
1015 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
1016 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
1017 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
1018 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
1019 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
1020 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
1021 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
1022 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
1023 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
1024 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
1025 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
1026 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
1027 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
1028 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
1029 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
1030 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
1031 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
1032 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
1033 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
1034 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
1035 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
1036 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
1037 // The MCInst has this field that is not directly encoded in the
1038 // instruction.
1039 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
1040 }
1041}
1042
1044 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1045 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1046 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
1047 // VOPC - insert clamp
1048 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
1049 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1050 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
1051 if (SDst != -1) {
1052 // VOPC - insert VCC register as sdst
1054 AMDGPU::OpName::sdst);
1055 } else {
1056 // VOP1/2 - insert omod if present in instruction
1057 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
1058 }
1059 }
1060}
1061
1062/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1063/// appropriate subregister for the used format width.
1065 MCOperand &MO, uint8_t NumRegs) {
1066 switch (NumRegs) {
1067 case 4:
1068 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1069 case 6:
1070 return MO.setReg(
1071 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1072 case 8:
1073 if (MCRegister NewReg = MRI.getSubReg(
1074 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1075 MO.setReg(NewReg);
1076 }
1077 return;
1078 case 12: {
1079 // There is no 384-bit subreg index defined.
1080 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1081 MCRegister NewReg = MRI.getMatchingSuperReg(
1082 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1083 return MO.setReg(NewReg);
1084 }
1085 case 16:
1086 // No-op in cases where one operand is still f8/bf8.
1087 return;
1088 default:
1089 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1090 }
1091}
1092
1093/// f8f6f4 instructions have different pseudos depending on the used formats. In
1094/// the disassembler table, we only have the variants with the largest register
1095/// classes which assume using an fp8/bf8 format for both operands. The actual
1096/// register class depends on the format in blgp and cbsz operands. Adjust the
1097/// register classes depending on the used format.
1099 int BlgpIdx =
1100 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1101 if (BlgpIdx == -1)
1102 return;
1103
1104 int CbszIdx =
1105 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1106
1107 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1108 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1109
1110 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1111 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1112 if (!AdjustedRegClassOpcode ||
1113 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1114 return;
1115
1116 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1117 int Src0Idx =
1118 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1119 int Src1Idx =
1120 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1121 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1122 AdjustedRegClassOpcode->NumRegsSrcA);
1123 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1124 AdjustedRegClassOpcode->NumRegsSrcB);
1125}
1126
1128 int FmtAIdx =
1129 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1130 if (FmtAIdx == -1)
1131 return;
1132
1133 int FmtBIdx =
1134 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1135
1136 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1137 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1138
1139 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1140 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1141 if (!AdjustedRegClassOpcode ||
1142 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1143 return;
1144
1145 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1146 int Src0Idx =
1147 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1148 int Src1Idx =
1149 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1150 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1151 AdjustedRegClassOpcode->NumRegsSrcA);
1152 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1153 AdjustedRegClassOpcode->NumRegsSrcB);
1154}
1155
1157 unsigned OpSel = 0;
1158 unsigned OpSelHi = 0;
1159 unsigned NegLo = 0;
1160 unsigned NegHi = 0;
1161};
1162
1163// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1164// Note that these values do not affect disassembler output,
1165// so this is only necessary for consistency with src_modifiers.
1167 bool IsVOP3P = false) {
1168 VOPModifiers Modifiers;
1169 unsigned Opc = MI.getOpcode();
1170 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1171 AMDGPU::OpName::src1_modifiers,
1172 AMDGPU::OpName::src2_modifiers};
1173 for (int J = 0; J < 3; ++J) {
1174 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1175 if (OpIdx == -1)
1176 continue;
1177
1178 unsigned Val = MI.getOperand(OpIdx).getImm();
1179
1180 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1181 if (IsVOP3P) {
1182 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1183 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1184 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1185 } else if (J == 0) {
1186 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1187 }
1188 }
1189
1190 return Modifiers;
1191}
1192
1193// Instructions decode the op_sel/suffix bits into the src_modifier
1194// operands. Copy those bits into the src operands for true16 VGPRs.
1196 const unsigned Opc = MI.getOpcode();
1197 const MCRegisterClass &ConversionRC =
1198 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1199 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1200 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1202 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1204 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1206 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1208 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1209 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1210 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1211 if (OpIdx == -1 || OpModsIdx == -1)
1212 continue;
1213 MCOperand &Op = MI.getOperand(OpIdx);
1214 if (!Op.isReg())
1215 continue;
1216 if (!ConversionRC.contains(Op.getReg()))
1217 continue;
1218 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1219 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1220 unsigned ModVal = OpMods.getImm();
1221 if (ModVal & OpSelMask) { // isHi
1222 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1223 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1224 }
1225 }
1226}
1227
1228// MAC opcodes have special old and src2 operands.
1229// src2 is tied to dst, while old is not tied (but assumed to be).
1231 constexpr int DST_IDX = 0;
1232 auto Opcode = MI.getOpcode();
1233 const auto &Desc = MCII->get(Opcode);
1234 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1235
1236 if (OldIdx != -1 && Desc.getOperandConstraint(
1237 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1238 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1239 assert(Desc.getOperandConstraint(
1240 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1242 (void)DST_IDX;
1243 return true;
1244 }
1245
1246 return false;
1247}
1248
1249// Create dummy old operand and insert dummy unused src2_modifiers
1251 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1252 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1254 AMDGPU::OpName::src2_modifiers);
1255}
1256
1258 unsigned Opc = MI.getOpcode();
1259
1260 int VDstInIdx =
1261 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1262 if (VDstInIdx != -1)
1263 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1264
1265 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1266 if (MI.getNumOperands() < DescNumOps &&
1267 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1269 auto Mods = collectVOPModifiers(MI);
1271 AMDGPU::OpName::op_sel);
1272 } else {
1273 // Insert dummy unused src modifiers.
1274 if (MI.getNumOperands() < DescNumOps &&
1275 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1277 AMDGPU::OpName::src0_modifiers);
1278
1279 if (MI.getNumOperands() < DescNumOps &&
1280 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1282 AMDGPU::OpName::src1_modifiers);
1283 }
1284}
1285
1288
1289 int VDstInIdx =
1290 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1291 if (VDstInIdx != -1)
1292 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1293
1294 unsigned Opc = MI.getOpcode();
1295 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1296 if (MI.getNumOperands() < DescNumOps &&
1297 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1298 auto Mods = collectVOPModifiers(MI);
1300 AMDGPU::OpName::op_sel);
1301 }
1302}
1303
1304// Given a wide tuple \p Reg check if it will overflow 256 registers.
1305// \returns \p Reg on success or NoRegister otherwise.
1307 const MCRegisterInfo &MRI) {
1308 unsigned NumRegs = RC.getSizeInBits() / 32;
1309 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1310 if (!Sub0)
1311 return Reg;
1312
1313 MCRegister BaseReg;
1314 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1315 BaseReg = AMDGPU::VGPR0;
1316 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1317 BaseReg = AMDGPU::AGPR0;
1318
1319 assert(BaseReg && "Only vector registers expected");
1320
1321 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1322}
1323
1324// Note that before gfx10, the MIMG encoding provided no information about
1325// VADDR size. Consequently, decoded instructions always show address as if it
1326// has 1 dword, which could be not really so.
1328 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1329 AMDGPU::OpName::vdst);
1330
1331 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1332 AMDGPU::OpName::vdata);
1333 int VAddr0Idx =
1334 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1335 AMDGPU::OpName RsrcOpName = SIInstrFlags::isMIMG(*MCII, MI)
1336 ? AMDGPU::OpName::srsrc
1337 : AMDGPU::OpName::rsrc;
1338 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1339 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1340 AMDGPU::OpName::dmask);
1341
1342 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1343 AMDGPU::OpName::tfe);
1344 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1345 AMDGPU::OpName::d16);
1346
1347 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1348 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1349 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1350
1351 assert(VDataIdx != -1);
1352 if (BaseOpcode->BVH) {
1353 // Add A16 operand for intersect_ray instructions
1354 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1355 return;
1356 }
1357
1358 bool IsAtomic = (VDstIdx != -1);
1359 bool IsGather4 = SIInstrFlags::isGather4(*MCII, MI);
1360 bool IsVSample = SIInstrFlags::isVSAMPLE(*MCII, MI);
1361 bool IsNSA = false;
1362 bool IsPartialNSA = false;
1363 unsigned AddrSize = Info->VAddrDwords;
1364
1365 if (isGFX10Plus()) {
1366 unsigned DimIdx =
1367 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1368 int A16Idx =
1369 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1370 const AMDGPU::MIMGDimInfo *Dim =
1371 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1372 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1373
1374 AddrSize =
1375 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1376
1377 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1378 // VIMAGE insts other than BVH never use vaddr4.
1379 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1380 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1381 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1382 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1383 if (!IsNSA) {
1384 if (!IsVSample && AddrSize > 12)
1385 AddrSize = 16;
1386 } else {
1387 if (AddrSize > Info->VAddrDwords) {
1388 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1389 // The NSA encoding does not contain enough operands for the
1390 // combination of base opcode / dimension. Should this be an error?
1391 return;
1392 }
1393 IsPartialNSA = true;
1394 }
1395 }
1396 }
1397
1398 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1399 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1400
1401 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1402 if (D16 && AMDGPU::hasPackedD16(STI)) {
1403 DstSize = (DstSize + 1) / 2;
1404 }
1405
1406 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1407 DstSize += 1;
1408
1409 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1410 return;
1411
1412 int NewOpcode =
1413 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1414 if (NewOpcode == -1)
1415 return;
1416
1417 // Widen the register to the correct number of enabled channels.
1418 MCRegister NewVdata;
1419 if (DstSize != Info->VDataDwords) {
1420 auto DataRCID = MCII->getOpRegClassID(
1421 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1422
1423 // Get first subregister of VData
1424 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1425 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1426 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1427
1428 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1429 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1430 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1431 if (!NewVdata) {
1432 // It's possible to encode this such that the low register + enabled
1433 // components exceeds the register count.
1434 return;
1435 }
1436 }
1437
1438 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1439 // If using partial NSA on GFX11+ widen last address register.
1440 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1441 MCRegister NewVAddrSA;
1442 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1443 AddrSize != Info->VAddrDwords) {
1444 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1445 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1446 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1447
1448 auto AddrRCID = MCII->getOpRegClassID(
1449 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1450
1451 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1452 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1453 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1454 if (!NewVAddrSA)
1455 return;
1456 }
1457
1458 MI.setOpcode(NewOpcode);
1459
1460 if (NewVdata != AMDGPU::NoRegister) {
1461 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1462
1463 if (IsAtomic) {
1464 // Atomic operations have an additional operand (a copy of data)
1465 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1466 }
1467 }
1468
1469 if (NewVAddrSA) {
1470 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1471 } else if (IsNSA) {
1472 assert(AddrSize <= Info->VAddrDwords);
1473 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1474 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1475 }
1476}
1477
1478// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1479// decoder only adds to src_modifiers, so manually add the bits to the other
1480// operands.
1482 unsigned Opc = MI.getOpcode();
1483 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1484 auto Mods = collectVOPModifiers(MI, true);
1485
1486 if (MI.getNumOperands() < DescNumOps &&
1487 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1488 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1489
1490 if (MI.getNumOperands() < DescNumOps &&
1491 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1493 AMDGPU::OpName::op_sel);
1494 if (MI.getNumOperands() < DescNumOps &&
1495 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1497 AMDGPU::OpName::op_sel_hi);
1498 if (MI.getNumOperands() < DescNumOps &&
1499 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1501 AMDGPU::OpName::neg_lo);
1502 if (MI.getNumOperands() < DescNumOps &&
1503 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1505 AMDGPU::OpName::neg_hi);
1506}
1507
1508// Create dummy old operand and insert optional operands
1510 unsigned Opc = MI.getOpcode();
1511 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1512
1513 if (MI.getNumOperands() < DescNumOps &&
1514 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1515 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1516
1517 if (MI.getNumOperands() < DescNumOps &&
1518 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1520 AMDGPU::OpName::src0_modifiers);
1521
1522 if (MI.getNumOperands() < DescNumOps &&
1523 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1525 AMDGPU::OpName::src1_modifiers);
1526}
1527
1529 unsigned Opc = MI.getOpcode();
1530 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1531
1533
1534 if (MI.getNumOperands() < DescNumOps &&
1535 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1538 AMDGPU::OpName::op_sel);
1539 }
1540}
1541
1543 assert(HasLiteral && "Should have decoded a literal");
1544 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1545}
1546
1547const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1549 &getAMDGPUMCRegisterClass(RegClassID));
1550}
1551
1552inline
1554 const Twine& ErrMsg) const {
1555 *CommentStream << "Error: " + ErrMsg;
1556
1557 // ToDo: add support for error operands to MCInst.h
1558 // return MCOperand::createError(V);
1559 return MCOperand();
1560}
1561
1565
1566inline
1568 unsigned Val) const {
1569 const auto &RegCl = getAMDGPUMCRegisterClass(RegClassID);
1570 if (Val >= RegCl.getNumRegs())
1571 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1572 ": unknown register " + Twine(Val));
1573 return createRegOperand(RegCl.getRegister(Val));
1574}
1575
1576inline
1578 unsigned Val) const {
1579 // ToDo: SI/CI have 104 SGPRs, VI - 102
1580 // Valery: here we accepting as much as we can, let assembler sort it out
1581 int shift = 0;
1582 switch (SRegClassID) {
1583 case AMDGPU::SGPR_32RegClassID:
1584 case AMDGPU::TTMP_32RegClassID:
1585 break;
1586 case AMDGPU::SGPR_64RegClassID:
1587 case AMDGPU::TTMP_64RegClassID:
1588 shift = 1;
1589 break;
1590 case AMDGPU::SGPR_96RegClassID:
1591 case AMDGPU::TTMP_96RegClassID:
1592 case AMDGPU::SGPR_128RegClassID:
1593 case AMDGPU::TTMP_128RegClassID:
1594 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1595 // this bundle?
1596 case AMDGPU::SGPR_256RegClassID:
1597 case AMDGPU::TTMP_256RegClassID:
1598 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1599 // this bundle?
1600 case AMDGPU::SGPR_288RegClassID:
1601 case AMDGPU::TTMP_288RegClassID:
1602 case AMDGPU::SGPR_320RegClassID:
1603 case AMDGPU::TTMP_320RegClassID:
1604 case AMDGPU::SGPR_352RegClassID:
1605 case AMDGPU::TTMP_352RegClassID:
1606 case AMDGPU::SGPR_384RegClassID:
1607 case AMDGPU::TTMP_384RegClassID:
1608 case AMDGPU::SGPR_512RegClassID:
1609 case AMDGPU::TTMP_512RegClassID:
1610 shift = 2;
1611 break;
1612 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1613 // this bundle?
1614 default:
1615 llvm_unreachable("unhandled register class");
1616 }
1617
1618 if (Val % (1 << shift)) {
1619 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1620 << ": scalar reg isn't aligned " << Val;
1621 }
1622
1623 return createRegOperand(SRegClassID, Val >> shift);
1624}
1625
1627 bool IsHi) const {
1628 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1629 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1630}
1631
1632// Decode Literals for insts which always have a literal in the encoding
1635 if (HasLiteral) {
1636 assert(
1638 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1639 if (Literal != Val)
1640 return errOperand(Val, "More than one unique literal is illegal");
1641 }
1642 HasLiteral = true;
1643 Literal = Val;
1644 return MCOperand::createImm(Literal);
1645}
1646
1649 if (HasLiteral) {
1650 if (Literal != Val)
1651 return errOperand(Val, "More than one unique literal is illegal");
1652 }
1653 HasLiteral = true;
1654 Literal = Val;
1655
1656 bool UseLit64 = Hi_32(Literal) == 0;
1658 LitModifier::Lit64, Literal, getContext()))
1659 : MCOperand::createImm(Literal);
1660}
1661
1664 const MCOperandInfo &OpDesc) const {
1665 // For now all literal constants are supposed to be unsigned integer
1666 // ToDo: deal with signed/unsigned 64-bit integer constants
1667 // ToDo: deal with float/double constants
1668 if (!HasLiteral) {
1669 if (Bytes.size() < 4) {
1670 return errOperand(0, "cannot read literal, inst bytes left " +
1671 Twine(Bytes.size()));
1672 }
1673 HasLiteral = true;
1674 Literal = eatBytes<uint32_t>(Bytes);
1675 }
1676
1677 // For disassembling always assume all inline constants are available.
1678 bool HasInv2Pi = true;
1679
1680 // Invalid instruction codes may contain literals for inline-only
1681 // operands, so we support them here as well.
1682 int64_t Val = Literal;
1683 bool UseLit = false;
1684 switch (OpDesc.OperandType) {
1685 default:
1686 llvm_unreachable("Unexpected operand type!");
1690 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1691 break;
1694 break;
1698 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1699 break;
1701 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1702 break;
1705 break;
1707 break;
1711 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1712 break;
1714 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1715 break;
1725 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1726 break;
1731 UseLit = AMDGPU::isInlinableLiteral64(Val << 32, HasInv2Pi);
1732 if (!UseLit)
1733 Val <<= 32;
1734 break;
1738 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1739 break;
1741 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1742 // decoding a literal in a position of a register operand. Give
1743 // it special handling in the caller, decodeImmOperands(), instead
1744 // of quietly allowing it here.
1745 break;
1746 }
1747
1750 : MCOperand::createImm(Val);
1751}
1752
1754 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1755
1756 if (!HasLiteral) {
1757 if (Bytes.size() < 8) {
1758 return errOperand(0, "cannot read literal64, inst bytes left " +
1759 Twine(Bytes.size()));
1760 }
1761 HasLiteral = true;
1762 Literal = eatBytes<uint64_t>(Bytes);
1763 }
1764
1765 bool UseLit64 = Hi_32(Literal) == 0;
1766
1767 UseLit64 |= AMDGPU::isInlinableLiteral64(
1768 Literal, STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm));
1769
1771 LitModifier::Lit64, Literal, getContext()))
1772 : MCOperand::createImm(Literal);
1773}
1774
1776 using namespace AMDGPU::EncValues;
1777
1778 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1779 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1780 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1781 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1782 // Cast prevents negative overflow.
1783}
1784
1785static int64_t getInlineImmVal32(unsigned Imm) {
1786 switch (Imm) {
1787 case 240:
1788 return llvm::bit_cast<uint32_t>(0.5f);
1789 case 241:
1790 return llvm::bit_cast<uint32_t>(-0.5f);
1791 case 242:
1792 return llvm::bit_cast<uint32_t>(1.0f);
1793 case 243:
1794 return llvm::bit_cast<uint32_t>(-1.0f);
1795 case 244:
1796 return llvm::bit_cast<uint32_t>(2.0f);
1797 case 245:
1798 return llvm::bit_cast<uint32_t>(-2.0f);
1799 case 246:
1800 return llvm::bit_cast<uint32_t>(4.0f);
1801 case 247:
1802 return llvm::bit_cast<uint32_t>(-4.0f);
1803 case 248: // 1 / (2 * PI)
1804 return 0x3e22f983;
1805 default:
1806 llvm_unreachable("invalid fp inline imm");
1807 }
1808}
1809
1810static int64_t getInlineImmVal64(unsigned Imm) {
1811 switch (Imm) {
1812 case 240:
1813 return llvm::bit_cast<uint64_t>(0.5);
1814 case 241:
1815 return llvm::bit_cast<uint64_t>(-0.5);
1816 case 242:
1817 return llvm::bit_cast<uint64_t>(1.0);
1818 case 243:
1819 return llvm::bit_cast<uint64_t>(-1.0);
1820 case 244:
1821 return llvm::bit_cast<uint64_t>(2.0);
1822 case 245:
1823 return llvm::bit_cast<uint64_t>(-2.0);
1824 case 246:
1825 return llvm::bit_cast<uint64_t>(4.0);
1826 case 247:
1827 return llvm::bit_cast<uint64_t>(-4.0);
1828 case 248: // 1 / (2 * PI)
1829 return 0x3fc45f306dc9c882;
1830 default:
1831 llvm_unreachable("invalid fp inline imm");
1832 }
1833}
1834
1835static int64_t getInlineImmValF16(unsigned Imm) {
1836 switch (Imm) {
1837 case 240:
1838 return 0x3800;
1839 case 241:
1840 return 0xB800;
1841 case 242:
1842 return 0x3C00;
1843 case 243:
1844 return 0xBC00;
1845 case 244:
1846 return 0x4000;
1847 case 245:
1848 return 0xC000;
1849 case 246:
1850 return 0x4400;
1851 case 247:
1852 return 0xC400;
1853 case 248: // 1 / (2 * PI)
1854 return 0x3118;
1855 default:
1856 llvm_unreachable("invalid fp inline imm");
1857 }
1858}
1859
1860static int64_t getInlineImmValBF16(unsigned Imm) {
1861 switch (Imm) {
1862 case 240:
1863 return 0x3F00;
1864 case 241:
1865 return 0xBF00;
1866 case 242:
1867 return 0x3F80;
1868 case 243:
1869 return 0xBF80;
1870 case 244:
1871 return 0x4000;
1872 case 245:
1873 return 0xC000;
1874 case 246:
1875 return 0x4080;
1876 case 247:
1877 return 0xC080;
1878 case 248: // 1 / (2 * PI)
1879 return 0x3E22;
1880 default:
1881 llvm_unreachable("invalid fp inline imm");
1882 }
1883}
1884
1885unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1886 using namespace AMDGPU;
1887
1888 switch (Width) {
1889 case 16:
1890 case 32:
1891 return VGPR_32RegClassID;
1892 case 64:
1893 return VReg_64RegClassID;
1894 case 96:
1895 return VReg_96RegClassID;
1896 case 128:
1897 return VReg_128RegClassID;
1898 case 160:
1899 return VReg_160RegClassID;
1900 case 192:
1901 return VReg_192RegClassID;
1902 case 256:
1903 return VReg_256RegClassID;
1904 case 288:
1905 return VReg_288RegClassID;
1906 case 320:
1907 return VReg_320RegClassID;
1908 case 352:
1909 return VReg_352RegClassID;
1910 case 384:
1911 return VReg_384RegClassID;
1912 case 512:
1913 return VReg_512RegClassID;
1914 case 1024:
1915 return VReg_1024RegClassID;
1916 }
1917 llvm_unreachable("Invalid register width!");
1918}
1919
1920unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1921 using namespace AMDGPU;
1922
1923 switch (Width) {
1924 case 16:
1925 case 32:
1926 return AGPR_32RegClassID;
1927 case 64:
1928 return AReg_64RegClassID;
1929 case 96:
1930 return AReg_96RegClassID;
1931 case 128:
1932 return AReg_128RegClassID;
1933 case 160:
1934 return AReg_160RegClassID;
1935 case 256:
1936 return AReg_256RegClassID;
1937 case 288:
1938 return AReg_288RegClassID;
1939 case 320:
1940 return AReg_320RegClassID;
1941 case 352:
1942 return AReg_352RegClassID;
1943 case 384:
1944 return AReg_384RegClassID;
1945 case 512:
1946 return AReg_512RegClassID;
1947 case 1024:
1948 return AReg_1024RegClassID;
1949 }
1950 llvm_unreachable("Invalid register width!");
1951}
1952
1953unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1954 using namespace AMDGPU;
1955
1956 switch (Width) {
1957 case 16:
1958 case 32:
1959 return SGPR_32RegClassID;
1960 case 64:
1961 return SGPR_64RegClassID;
1962 case 96:
1963 return SGPR_96RegClassID;
1964 case 128:
1965 return SGPR_128RegClassID;
1966 case 160:
1967 return SGPR_160RegClassID;
1968 case 256:
1969 return SGPR_256RegClassID;
1970 case 288:
1971 return SGPR_288RegClassID;
1972 case 320:
1973 return SGPR_320RegClassID;
1974 case 352:
1975 return SGPR_352RegClassID;
1976 case 384:
1977 return SGPR_384RegClassID;
1978 case 512:
1979 return SGPR_512RegClassID;
1980 }
1981 llvm_unreachable("Invalid register width!");
1982}
1983
1984unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1985 using namespace AMDGPU;
1986
1987 switch (Width) {
1988 case 16:
1989 case 32:
1990 return TTMP_32RegClassID;
1991 case 64:
1992 return TTMP_64RegClassID;
1993 case 128:
1994 return TTMP_128RegClassID;
1995 case 256:
1996 return TTMP_256RegClassID;
1997 case 288:
1998 return TTMP_288RegClassID;
1999 case 320:
2000 return TTMP_320RegClassID;
2001 case 352:
2002 return TTMP_352RegClassID;
2003 case 384:
2004 return TTMP_384RegClassID;
2005 case 512:
2006 return TTMP_512RegClassID;
2007 }
2008 llvm_unreachable("Invalid register width!");
2009}
2010
2011int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
2012 using namespace AMDGPU::EncValues;
2013
2014 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
2015 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
2016
2017 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
2018}
2019
2021 unsigned Val) const {
2022 using namespace AMDGPU::EncValues;
2023
2024 assert(Val < 1024); // enum10
2025
2026 bool IsAGPR = Val & 512;
2027 Val &= 511;
2028
2029 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
2030 return createRegOperand(IsAGPR ? getAgprClassId(Width)
2031 : getVgprClassId(Width), Val - VGPR_MIN);
2032 }
2033 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
2034}
2035
2037 unsigned Width,
2038 unsigned Val) const {
2039 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
2040 // decoded earlier.
2041 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
2042 using namespace AMDGPU::EncValues;
2043
2044 if (Val <= SGPR_MAX) {
2045 // "SGPR_MIN <= Val" is always true and causes compilation warning.
2046 static_assert(SGPR_MIN == 0);
2047 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
2048 }
2049
2050 int TTmpIdx = getTTmpIdx(Val);
2051 if (TTmpIdx >= 0) {
2052 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
2053 }
2054
2055 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2056 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2057 Val == LITERAL_CONST)
2058 return MCOperand::createImm(Val);
2059
2060 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2061 return decodeLiteral64Constant();
2062 }
2063
2064 switch (Width) {
2065 case 32:
2066 case 16:
2067 return decodeSpecialReg32(Val);
2068 case 64:
2069 return decodeSpecialReg64(Val);
2070 case 96:
2071 case 128:
2072 case 256:
2073 case 512:
2074 return decodeSpecialReg96Plus(Val);
2075 default:
2076 llvm_unreachable("unexpected immediate type");
2077 }
2078}
2079
2080// Bit 0 of DstY isn't stored in the instruction, because it's always the
2081// opposite of bit 0 of DstX.
2083 unsigned Val) const {
2084 int VDstXInd =
2085 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2086 assert(VDstXInd != -1);
2087 assert(Inst.getOperand(VDstXInd).isReg());
2088 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2089 Val |= ~XDstReg & 1;
2090 return createRegOperand(getVgprClassId(32), Val);
2091}
2092
2094 using namespace AMDGPU;
2095
2096 switch (Val) {
2097 // clang-format off
2098 case 102: return createRegOperand(FLAT_SCR_LO);
2099 case 103: return createRegOperand(FLAT_SCR_HI);
2100 case 104: return createRegOperand(XNACK_MASK_LO);
2101 case 105: return createRegOperand(XNACK_MASK_HI);
2102 case 106: return createRegOperand(VCC_LO);
2103 case 107: return createRegOperand(VCC_HI);
2104 case 108: return createRegOperand(TBA_LO);
2105 case 109: return createRegOperand(TBA_HI);
2106 case 110: return createRegOperand(TMA_LO);
2107 case 111: return createRegOperand(TMA_HI);
2108 case 124:
2109 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2110 case 125:
2111 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2112 case 126: return createRegOperand(EXEC_LO);
2113 case 127: return createRegOperand(EXEC_HI);
2114 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2115 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2116 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2117 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2118 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2119 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2120 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2121 case 251: return createRegOperand(SRC_VCCZ);
2122 case 252: return createRegOperand(SRC_EXECZ);
2123 case 253: return createRegOperand(SRC_SCC);
2124 case 254: return createRegOperand(LDS_DIRECT);
2125 default: break;
2126 // clang-format on
2127 }
2128 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2129}
2130
2132 using namespace AMDGPU;
2133
2134 switch (Val) {
2135 case 102: return createRegOperand(FLAT_SCR);
2136 case 104: return createRegOperand(XNACK_MASK);
2137 case 106: return createRegOperand(VCC);
2138 case 108: return createRegOperand(TBA);
2139 case 110: return createRegOperand(TMA);
2140 case 124:
2141 if (isGFX11Plus())
2142 return createRegOperand(SGPR_NULL);
2143 break;
2144 case 125:
2145 if (!isGFX11Plus())
2146 return createRegOperand(SGPR_NULL);
2147 break;
2148 case 126: return createRegOperand(EXEC);
2149 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2150 case 235: return createRegOperand(SRC_SHARED_BASE);
2151 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2152 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2153 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2154 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2155 case 251: return createRegOperand(SRC_VCCZ);
2156 case 252: return createRegOperand(SRC_EXECZ);
2157 case 253: return createRegOperand(SRC_SCC);
2158 default: break;
2159 }
2160 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2161}
2162
2164 using namespace AMDGPU;
2165
2166 switch (Val) {
2167 case 124:
2168 if (isGFX11Plus())
2169 return createRegOperand(SGPR_NULL);
2170 break;
2171 case 125:
2172 if (!isGFX11Plus())
2173 return createRegOperand(SGPR_NULL);
2174 break;
2175 default:
2176 break;
2177 }
2178 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2179}
2180
2182 const unsigned Val) const {
2183 using namespace AMDGPU::SDWA;
2184 using namespace AMDGPU::EncValues;
2185
2186 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2187 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2188 // XXX: cast to int is needed to avoid stupid warning:
2189 // compare with unsigned is always true
2190 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2191 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2192 return createRegOperand(getVgprClassId(Width),
2193 Val - SDWA9EncValues::SRC_VGPR_MIN);
2194 }
2195 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2196 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2197 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2198 return createSRegOperand(getSgprClassId(Width),
2199 Val - SDWA9EncValues::SRC_SGPR_MIN);
2200 }
2201 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2202 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2203 return createSRegOperand(getTtmpClassId(Width),
2204 Val - SDWA9EncValues::SRC_TTMP_MIN);
2205 }
2206
2207 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2208
2209 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2210 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2211 return MCOperand::createImm(SVal);
2212
2213 return decodeSpecialReg32(SVal);
2214 }
2215 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2216 return createRegOperand(getVgprClassId(Width), Val);
2217 llvm_unreachable("unsupported target");
2218}
2219
2221 return decodeSDWASrc(16, Val);
2222}
2223
2225 return decodeSDWASrc(32, Val);
2226}
2227
2229 using namespace AMDGPU::SDWA;
2230
2231 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2232 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2233 "SDWAVopcDst should be present only on GFX9+");
2234
2235 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2236
2237 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2238 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2239
2240 int TTmpIdx = getTTmpIdx(Val);
2241 if (TTmpIdx >= 0) {
2242 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2243 return createSRegOperand(TTmpClsId, TTmpIdx);
2244 }
2245 if (Val > SGPR_MAX) {
2246 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2247 }
2248 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2249 }
2250 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2251}
2252
2254 unsigned Val) const {
2255 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2256 ? decodeSrcOp(Inst, 32, Val)
2257 : decodeSrcOp(Inst, 64, Val);
2258}
2259
2261 unsigned Val) const {
2262 return decodeSrcOp(Inst, 32, Val);
2263}
2264
2267 return MCOperand();
2268 return MCOperand::createImm(Val);
2269}
2270
2272 using VersionField = AMDGPU::EncodingField<7, 0>;
2273 using W64Bit = AMDGPU::EncodingBit<13>;
2274 using W32Bit = AMDGPU::EncodingBit<14>;
2275 using MDPBit = AMDGPU::EncodingBit<15>;
2277
2278 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2279
2280 // Decode into a plain immediate if any unused bits are raised.
2281 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2282 return MCOperand::createImm(Imm);
2283
2284 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2285 const auto *I = find_if(
2286 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2287 return V.Code == Version;
2288 });
2289 MCContext &Ctx = getContext();
2290 const MCExpr *E;
2291 if (I == Versions.end())
2293 else
2294 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2295
2296 if (W64)
2297 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2298 if (W32)
2299 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2300 if (MDP)
2301 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2302
2303 return MCOperand::createExpr(E);
2304}
2305
2307 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2308}
2309
2311
2313 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2314}
2315
2317
2319
2323
2325 return STI.hasFeature(AMDGPU::FeatureGFX11);
2326}
2327
2331
2333 return STI.hasFeature(AMDGPU::FeatureGFX11_7Insts);
2334}
2335
2337 return STI.hasFeature(AMDGPU::FeatureGFX12);
2338}
2339
2343
2345
2349
2351
2355
2357 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2358}
2359
2363
2364//===----------------------------------------------------------------------===//
2365// AMDGPU specific symbol handling
2366//===----------------------------------------------------------------------===//
2367
2368/// Print a string describing the reserved bit range specified by Mask with
2369/// offset BaseBytes for use in error comments. Mask is a single continuous
2370/// range of 1s surrounded by zeros. The format here is meant to align with the
2371/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2372static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2373 SmallString<32> Result;
2374 raw_svector_ostream S(Result);
2375
2376 int TrailingZeros = llvm::countr_zero(Mask);
2377 int PopCount = llvm::popcount(Mask);
2378
2379 if (PopCount == 1) {
2380 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2381 } else {
2382 S << "bits in range ("
2383 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2384 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2385 }
2386
2387 return Result;
2388}
2389
2390#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2391#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2392 do { \
2393 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2394 } while (0)
2395#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2396 do { \
2397 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2398 << GET_FIELD(MASK) << '\n'; \
2399 } while (0)
2400
2401#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2402 do { \
2403 if (FourByteBuffer & (MASK)) { \
2404 return createStringError(std::errc::invalid_argument, \
2405 "kernel descriptor " DESC \
2406 " reserved %s set" MSG, \
2407 getBitRangeFromMask((MASK), 0).c_str()); \
2408 } \
2409 } while (0)
2410
2411#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2412#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2413 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2414#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2415 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2416#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2417 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2418
2419// NOLINTNEXTLINE(readability-identifier-naming)
2421 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2422 using namespace amdhsa;
2423 StringRef Indent = "\t";
2424
2425 // We cannot accurately backward compute #VGPRs used from
2426 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2427 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2428 // simply calculate the inverse of what the assembler does.
2429
2430 uint32_t GranulatedWorkitemVGPRCount =
2431 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2432
2433 uint32_t NextFreeVGPR =
2434 (GranulatedWorkitemVGPRCount + 1) *
2435 AMDGPU::IsaInfo::getVGPREncodingGranule(STI, EnableWavefrontSize32);
2436
2437 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2438
2439 // We cannot backward compute values used to calculate
2440 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2441 // directives can't be computed:
2442 // .amdhsa_reserve_vcc
2443 // .amdhsa_reserve_flat_scratch
2444 // .amdhsa_reserve_xnack_mask
2445 // They take their respective default values if not specified in the assembly.
2446 //
2447 // GRANULATED_WAVEFRONT_SGPR_COUNT
2448 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2449 //
2450 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2451 // are set to 0. So while disassembling we consider that:
2452 //
2453 // GRANULATED_WAVEFRONT_SGPR_COUNT
2454 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2455 //
2456 // The disassembler cannot recover the original values of those 3 directives.
2457
2458 uint32_t GranulatedWavefrontSGPRCount =
2459 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2460
2461 if (isGFX10Plus())
2462 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2463 "must be zero on gfx10+");
2464
2465 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2467
2468 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2470 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2471 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2472 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2473 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2474 << '\n';
2475 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2476
2477 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2478
2479 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2480 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2481 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2482 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2483 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2484 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2485 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2486 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2487
2488 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2489
2490 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2491 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2492 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2493
2494 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2495
2496 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2497 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2498 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2499
2500 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2501 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2502
2503 // Bits [26].
2504 if (isGFX9Plus()) {
2505 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2506 } else {
2507 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2508 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2509 }
2510
2511 // Bits [27].
2512 if (isGFX1250Plus()) {
2513 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2514 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2515 } else {
2516 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2517 "COMPUTE_PGM_RSRC1");
2518 }
2519
2520 // Bits [28].
2521 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2522
2523 // Bits [29-31].
2524 if (isGFX10Plus()) {
2525 // WGP_MODE is not available on GFX1250.
2526 if (!isGFX1250Plus()) {
2527 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2528 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2529 }
2530 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2531 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2532 } else {
2533 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2534 "COMPUTE_PGM_RSRC1");
2535 }
2536
2537 if (isGFX12Plus())
2538 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2539 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2540
2541 return true;
2542}
2543
2544// NOLINTNEXTLINE(readability-identifier-naming)
2546 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2547 using namespace amdhsa;
2548 StringRef Indent = "\t";
2550 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2551 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2552 else
2553 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2554 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2555 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2556 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2557 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2558 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2559 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2560 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2561 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2562 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2563 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2564 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2565
2566 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2567 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2568 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2569
2571 ".amdhsa_exception_fp_ieee_invalid_op",
2572 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2573 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2574 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2576 ".amdhsa_exception_fp_ieee_div_zero",
2577 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2578 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2579 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2580 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2581 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2582 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2583 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2584 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2585 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2586
2587 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2588
2589 return true;
2590}
2591
2592// NOLINTNEXTLINE(readability-identifier-naming)
2594 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2595 using namespace amdhsa;
2596 StringRef Indent = "\t";
2597 if (isGFX90A()) {
2598 KdStream << Indent << ".amdhsa_accum_offset "
2599 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2600 << '\n';
2601
2602 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2603
2604 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2605 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2606 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2607 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2608 } else if (isGFX10Plus()) {
2609 // Bits [0-3].
2610 if (!isGFX12Plus()) {
2611 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2612 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2613 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2614 } else {
2616 "SHARED_VGPR_COUNT",
2617 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2618 }
2619 } else {
2620 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2621 "COMPUTE_PGM_RSRC3",
2622 "must be zero on gfx12+");
2623 }
2624
2625 // Bits [4-11].
2626 if (isGFX11()) {
2627 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2628 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2629 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2630 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2631 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2632 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2633 } else if (isGFX12Plus()) {
2634 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2635 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2636 } else {
2637 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2638 "COMPUTE_PGM_RSRC3",
2639 "must be zero on gfx10");
2640 }
2641
2642 // Bits [12].
2643 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2644 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2645
2646 // Bits [13].
2647 if (isGFX12Plus()) {
2649 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2650 } else {
2651 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2652 "COMPUTE_PGM_RSRC3",
2653 "must be zero on gfx10 or gfx11");
2654 }
2655
2656 // Bits [14-21].
2657 if (isGFX1250Plus()) {
2658 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2659 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2661 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2663 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2665 "ENABLE_DIDT_THROTTLE",
2666 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2667 } else {
2668 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2669 "COMPUTE_PGM_RSRC3",
2670 "must be zero on gfx10+");
2671 }
2672
2673 // Bits [22-30].
2674 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2675 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2676
2677 // Bits [31].
2678 if (isGFX11Plus()) {
2680 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2681 } else {
2682 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2683 "COMPUTE_PGM_RSRC3",
2684 "must be zero on gfx10");
2685 }
2686 } else if (FourByteBuffer) {
2687 return createStringError(
2688 std::errc::invalid_argument,
2689 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2690 }
2691 return true;
2692}
2693#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2694#undef PRINT_DIRECTIVE
2695#undef GET_FIELD
2696#undef CHECK_RESERVED_BITS_IMPL
2697#undef CHECK_RESERVED_BITS
2698#undef CHECK_RESERVED_BITS_MSG
2699#undef CHECK_RESERVED_BITS_DESC
2700#undef CHECK_RESERVED_BITS_DESC_MSG
2701
2702/// Create an error object to return from onSymbolStart for reserved kernel
2703/// descriptor bits being set.
2704static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2705 const char *Msg = "") {
2706 return createStringError(
2707 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2708 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2709}
2710
2711/// Create an error object to return from onSymbolStart for reserved kernel
2712/// descriptor bytes being set.
2713static Error createReservedKDBytesError(unsigned BaseInBytes,
2714 unsigned WidthInBytes) {
2715 // Create an error comment in the same format as the "Kernel Descriptor"
2716 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2717 return createStringError(
2718 std::errc::invalid_argument,
2719 "kernel descriptor reserved bits in range (%u:%u) set",
2720 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2721}
2722
2725 raw_string_ostream &KdStream) const {
2726#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2727 do { \
2728 KdStream << Indent << DIRECTIVE " " \
2729 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2730 } while (0)
2731
2732 uint16_t TwoByteBuffer = 0;
2733 uint32_t FourByteBuffer = 0;
2734
2735 StringRef ReservedBytes;
2736 StringRef Indent = "\t";
2737
2738 assert(Bytes.size() == 64);
2739 DataExtractor DE(Bytes, /*IsLittleEndian=*/true);
2740
2741 switch (Cursor.tell()) {
2743 FourByteBuffer = DE.getU32(Cursor);
2744 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2745 << '\n';
2746 return true;
2747
2749 FourByteBuffer = DE.getU32(Cursor);
2750 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2751 << FourByteBuffer << '\n';
2752 return true;
2753
2755 FourByteBuffer = DE.getU32(Cursor);
2756 KdStream << Indent << ".amdhsa_kernarg_size "
2757 << FourByteBuffer << '\n';
2758 return true;
2759
2761 // 4 reserved bytes, must be 0.
2762 ReservedBytes = DE.getBytes(Cursor, 4);
2763 for (char B : ReservedBytes) {
2764 if (B != 0)
2766 }
2767 return true;
2768
2770 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2771 // So far no directive controls this for Code Object V3, so simply skip for
2772 // disassembly.
2773 DE.skip(Cursor, 8);
2774 return true;
2775
2777 // 20 reserved bytes, must be 0.
2778 ReservedBytes = DE.getBytes(Cursor, 20);
2779 for (char B : ReservedBytes) {
2780 if (B != 0)
2782 }
2783 return true;
2784
2786 FourByteBuffer = DE.getU32(Cursor);
2787 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2788
2790 FourByteBuffer = DE.getU32(Cursor);
2791 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2792
2794 FourByteBuffer = DE.getU32(Cursor);
2795 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2796
2798 using namespace amdhsa;
2799 TwoByteBuffer = DE.getU16(Cursor);
2800
2802 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2803 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2804 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2805 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2806 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2807 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2808 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2809 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2810 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2811 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2813 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2814 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2815 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2816 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2817
2818 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2819 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2821
2822 // Reserved for GFX9
2823 if (isGFX9() &&
2824 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2826 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2827 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2828 }
2829 if (isGFX10Plus()) {
2830 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2831 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2832 }
2833
2834 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2835 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2836 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2837
2838 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2839 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2841 }
2842
2843 return true;
2844
2846 using namespace amdhsa;
2847 TwoByteBuffer = DE.getU16(Cursor);
2848 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2849 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2850 KERNARG_PRELOAD_SPEC_LENGTH);
2851 }
2852
2853 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2854 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2855 KERNARG_PRELOAD_SPEC_OFFSET);
2856 }
2857 return true;
2858
2860 // 4 bytes from here are reserved, must be 0.
2861 ReservedBytes = DE.getBytes(Cursor, 4);
2862 for (char B : ReservedBytes) {
2863 if (B != 0)
2865 }
2866 return true;
2867
2868 default:
2869 llvm_unreachable("Unhandled index. Case statements cover everything.");
2870 return true;
2871 }
2872#undef PRINT_DIRECTIVE
2873}
2874
2876 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2877
2878 // CP microcode requires the kernel descriptor to be 64 aligned.
2879 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2880 return createStringError(std::errc::invalid_argument,
2881 "kernel descriptor must be 64-byte aligned");
2882
2883 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2884 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2885 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2886 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2887 // when required.
2888 if (isGFX10Plus()) {
2889 uint16_t KernelCodeProperties =
2892 EnableWavefrontSize32 =
2893 AMDHSA_BITS_GET(KernelCodeProperties,
2894 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2895 }
2896
2897 std::string Kd;
2898 raw_string_ostream KdStream(Kd);
2899 KdStream << ".amdhsa_kernel " << KdName << '\n';
2900
2902 while (C && C.tell() < Bytes.size()) {
2903 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2904
2905 cantFail(C.takeError());
2906
2907 if (!Res)
2908 return Res;
2909 }
2910 KdStream << ".end_amdhsa_kernel\n";
2911 outs() << KdStream.str();
2912 return true;
2913}
2914
2916 uint64_t &Size,
2917 ArrayRef<uint8_t> Bytes,
2918 uint64_t Address) const {
2919 // Right now only kernel descriptor needs to be handled.
2920 // We ignore all other symbols for target specific handling.
2921 // TODO:
2922 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2923 // Object V2 and V3 when symbols are marked protected.
2924
2925 // amd_kernel_code_t for Code Object V2.
2926 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2927 Size = 256;
2928 return createStringError(std::errc::invalid_argument,
2929 "code object v2 is not supported");
2930 }
2931
2932 // Code Object V3 kernel descriptors.
2933 StringRef Name = Symbol.Name;
2934 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2935 Size = 64; // Size = 64 regardless of success or failure.
2936 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2937 }
2938
2939 return false;
2940}
2941
2942const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2943 int64_t Val) {
2944 MCContext &Ctx = getContext();
2945 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2946 // Note: only set value to Val on a new symbol in case an dissassembler
2947 // has already been initialized in this context.
2948 if (!Sym->isVariable()) {
2950 } else {
2951 int64_t Res = ~Val;
2952 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2953 if (!Valid || Res != Val)
2954 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2955 }
2956 return MCSymbolRefExpr::create(Sym, Ctx);
2957}
2958
2960 // Check for MUBUF and MTBUF instructions
2961 if (SIInstrFlags::isBuffer(*MCII, MI))
2962 return true;
2963
2964 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2965 if (SIInstrFlags::isSMRD(*MCII, MI) &&
2966 AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2967 return true;
2968
2969 return false;
2970}
2971
2972//===----------------------------------------------------------------------===//
2973// AMDGPUSymbolizer
2974//===----------------------------------------------------------------------===//
2975
2976// Try to find symbol name for specified label
2978 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2979 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2980 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2981
2982 if (!IsBranch) {
2983 return false;
2984 }
2985
2986 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2987 if (!Symbols)
2988 return false;
2989
2990 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2991 return Val.Addr == static_cast<uint64_t>(Value) &&
2992 Val.Type == ELF::STT_NOTYPE;
2993 });
2994 if (Result != Symbols->end()) {
2995 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2996 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2998 return true;
2999 }
3000 // Add to list of referenced addresses, so caller can synthesize a label.
3001 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
3002 return false;
3003}
3004
3006 int64_t Value,
3007 uint64_t Address) {
3008 llvm_unreachable("unimplemented");
3009}
3010
3011//===----------------------------------------------------------------------===//
3012// Initialization
3013//===----------------------------------------------------------------------===//
3014
3016 LLVMOpInfoCallback /*GetOpInfo*/,
3017 LLVMSymbolLookupCallback /*SymbolLookUp*/,
3018 void *DisInfo,
3019 MCContext *Ctx,
3020 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
3021 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
3022}
3023
3025 const MCSubtargetInfo &STI,
3026 MCContext &Ctx) {
3027 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
3028}
3029
3030extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
#define AMDGPU_MACH_LIST(X)
Definition ELF.h:768
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:215
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
void emitTargetIDIfSupported(raw_ostream &OS, unsigned EFlags) const override
Emit something based on ELF's e_flags if the target needs to.
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const T * data() const
Definition ArrayRef.h:138
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:407
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const char * getRegClassName(const MCRegisterClass *Class) const
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:213
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:433
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:451
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:419
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:426
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:442
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:439
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:444
@ OPERAND_REG_IMM_V2INT64
Definition SIDefines.h:429
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:428
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:423
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:418
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:425
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:424
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:427
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:438
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:436
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:430
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:422
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:445
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:456
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:457
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:431
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:421
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:441
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:437
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:443
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:432
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:458
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:440
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:420
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1424
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1438
@ STT_OBJECT
Definition ELF.h:1425
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
Definition ELF.h:909
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
Definition ELF.h:920
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
Definition ELF.h:924
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
Definition ELF.h:907
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
Definition ELF.h:911
@ EF_AMDGPU_FEATURE_XNACK_V4
Definition ELF.h:905
@ EF_AMDGPU_FEATURE_SRAMECC_V4
Definition ELF.h:918
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
Definition ELF.h:913
@ EF_AMDGPU_MACH
Definition ELF.h:851
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
Definition ELF.h:922
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
Definition ELF.h:926
constexpr bool isAtomicRet(const T &...O)
Definition SIDefines.h:357
constexpr bool isVOPC(const T &...O)
Definition SIDefines.h:231
constexpr bool isVOP3(const T &...O)
Definition SIDefines.h:234
constexpr bool isMAI(const T &...O)
Definition SIDefines.h:345
constexpr bool isFLAT(const T &...O)
Definition SIDefines.h:276
constexpr bool isVOP3P(const T &...O)
Definition SIDefines.h:237
constexpr bool isBuffer(const T &...O)
Definition SIDefines.h:258
constexpr bool isVIMAGE(const T &...O)
Definition SIDefines.h:267
constexpr bool isSMRD(const T &...O)
Definition SIDefines.h:261
constexpr bool isMIMG(const T &...O)
Definition SIDefines.h:264
constexpr bool isWMMA(const T &...O)
Definition SIDefines.h:360
constexpr bool isMUBUF(const T &...O)
Definition SIDefines.h:252
constexpr bool isSDWA(const T &...O)
Definition SIDefines.h:243
constexpr bool isEXP(const T &...O)
Definition SIDefines.h:273
constexpr bool isSOPK(const T &...O)
Definition SIDefines.h:219
constexpr bool isVINTERP(const T &...O)
Definition SIDefines.h:288
constexpr bool isVSAMPLE(const T &...O)
Definition SIDefines.h:270
constexpr bool isDS(const T &...O)
Definition SIDefines.h:279
constexpr bool isGather4(const T &...O)
Definition SIDefines.h:297
constexpr bool isDPP(const T &...O)
Definition SIDefines.h:246
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.