44#define DEBUG_TYPE "amdgpu-disassembler"
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
60 MAI(Ctx.getAsmInfo()),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&
STI)),
63 CodeObjectVersion(
AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
65 if (!
STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !
isGFX10Plus())
69 createConstantSymbolExpr(Symbol, Code);
71 UCVersionW64Expr = createConstantSymbolExpr(
"UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(
"UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(
"UC_VERSION_MDP_BIT", 0x8000);
81 unsigned EFlags)
const {
82 OS <<
"\t.amdgcn_target \""
89#define X(NUM, ENUM, NAME) \
104 switch (SrameccSetting) {
117 switch (XnackSetting) {
142 AMDGPU::OpName Name) {
143 int OpIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), Name);
145 auto *
I =
MI.begin();
160 if (DAsm->tryAddingSymbolicOperand(Inst,
Offset, Addr,
true, 2, 2, 0))
169 if (DAsm->isGFX12Plus()) {
171 }
else if (DAsm->isVI()) {
182 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
189 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
195 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
198#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
199 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
201 const MCDisassembler *Decoder) { \
202 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
203 return addOperand(Inst, DAsm->DecoderName(Imm)); \
208#define DECODE_OPERAND_REG_8(RegClass) \
209 static DecodeStatus Decode##RegClass##RegisterClass( \
210 MCInst &Inst, unsigned Imm, uint64_t , \
211 const MCDisassembler *Decoder) { \
212 assert(Imm < (1 << 8) && "8-bit encoding"); \
213 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
215 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
218#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
219 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t , \
220 const MCDisassembler *Decoder) { \
221 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
222 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
223 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
227 unsigned OpWidth,
unsigned Imm,
unsigned EncImm,
229 assert(Imm < (1U << EncSize) &&
"Operand doesn't fit encoding!");
231 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
236#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
237 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
239#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
240 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
246template <
unsigned OpW
idth>
254template <
unsigned OpW
idth>
258 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
264template <
unsigned OpW
idth>
267 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
272template <
unsigned OpW
idth>
276 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
284template <
unsigned OpW
idth>
288 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
293template <
unsigned OpW
idth>
297 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
345 assert((Imm & (1 << 8)) == 0 &&
"Imm{8} should not be used");
347 bool IsHi = Imm & (1 << 9);
348 unsigned RegIdx = Imm & 0xff;
350 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358 bool IsHi = Imm & (1 << 7);
359 unsigned RegIdx = Imm & 0x7f;
361 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
364template <
unsigned OpW
idth>
372 bool IsHi = Imm & (1 << 7);
373 unsigned RegIdx = Imm & 0x7f;
374 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
376 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
379template <
unsigned OpW
idth>
387 bool IsHi = Imm & (1 << 9);
388 unsigned RegIdx = Imm & 0xff;
389 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
391 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
402 bool IsHi = Imm & (1 << 9);
403 unsigned RegIdx = Imm & 0xff;
404 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
411 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
418 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
422 uint64_t Addr,
const void *Decoder) {
424 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
430 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
433template <
unsigned Opw>
443 assert(Imm < (1 << 9) &&
"9-bit encoding");
445 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
448#define DECODE_SDWA(DecName) \
449DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
459 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
462#include "AMDGPUGenDisassemblerTables.inc"
466template <>
constexpr uint32_t InsnBitWidth<uint32_t> = 32;
467template <>
constexpr uint32_t InsnBitWidth<uint64_t> = 64;
468template <>
constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
469template <>
constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
476template <
typename InsnType>
484 const auto SavedBytes = Bytes;
491 decodeInstruction(Table, TmpInst, Inst,
Address,
this,
STI);
497 Comments << LocalComments;
504template <
typename InsnType>
509 for (
const uint8_t *
T : {Table1, Table2}) {
520 Bytes = Bytes.
slice(
sizeof(
T));
528 Bytes = Bytes.
slice(8);
530 Bytes = Bytes.
slice(4);
531 return (
Hi << 64) |
Lo;
538 Bytes = Bytes.
slice(8);
540 Bytes = Bytes.
slice(8);
541 return (
Hi << 64) |
Lo;
544void AMDGPUDisassembler::decodeImmOperands(
MCInst &
MI,
546 const MCInstrDesc &
Desc = MCII.get(
MI.getOpcode());
548 if (OpNo >=
MI.getNumOperands())
558 MCOperand &
Op =
MI.getOperand(OpNo);
561 int64_t
Imm =
Op.getImm();
575 switch (OpDesc.OperandType) {
596 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
623 unsigned MaxInstBytesNum = std::min((
size_t)TargetMaxInstBytes, Bytes_.
size());
624 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
628 Size = std::min((
size_t)4, Bytes_.
size());
640 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
675 if (
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
677 Bytes = Bytes_.
slice(4, MaxInstBytesNum - 4);
685 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
687 }
else if (Bytes.size() >= 16 &&
688 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
694 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
697 if (Bytes.size() >= 8) {
700 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
704 if (
STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
708 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
715 if (
STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
719 if (
STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
723 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
775 Bytes = Bytes_.
slice(0, MaxInstBytesNum);
779 if (Bytes.size() >= 4) {
792 if (
STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
796 if (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
800 if (
STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
838 decodeImmOperands(
MI, *MCII);
850 else if (AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dpp8) !=
862 AMDGPU::OpName::src2_modifiers);
865 if (
MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
866 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
869 AMDGPU::OpName::src2_modifiers);
878 int CPolPos = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
879 AMDGPU::OpName::cpol);
883 if (
MI.getNumOperands() <= (
unsigned)CPolPos) {
885 AMDGPU::OpName::cpol);
887 MI.getOperand(CPolPos).setImm(
MI.getOperand(CPolPos).getImm() | CPol);
893 (
STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
896 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
897 if (TFEOpIdx != -1) {
898 auto *TFEIter =
MI.begin();
899 std::advance(TFEIter, TFEOpIdx);
907 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::offset);
908 if (OffsetIdx != -1) {
909 uint32_t Imm =
MI.getOperand(OffsetIdx).getImm();
911 if (SignedOffset < 0)
918 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::swz);
919 if (SWZOpIdx != -1) {
920 auto *SWZIter =
MI.begin();
921 std::advance(SWZIter, SWZOpIdx);
929 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
931 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
932 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
933 if (VAddr0Idx >= 0 && NSAArgs > 0) {
934 unsigned NSAWords = (NSAArgs + 3) / 4;
935 if (Bytes.size() < 4 * NSAWords)
937 for (
unsigned i = 0; i < NSAArgs; ++i) {
938 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
940 MCII->getOpRegClassID(
Desc.operands()[VAddrIdx], HwModeRegClass);
943 Bytes = Bytes.slice(4 * NSAWords);
967 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
968 AMDGPU::OpName::vdst_in);
969 if (VDstIn_Idx != -1) {
970 int Tied = MCII->get(
MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
972 if (Tied != -1 && (
MI.getNumOperands() <= (
unsigned)VDstIn_Idx ||
973 !
MI.getOperand(VDstIn_Idx).isReg() ||
974 MI.getOperand(VDstIn_Idx).getReg() !=
MI.getOperand(Tied).getReg())) {
975 if (
MI.getNumOperands() > (
unsigned)VDstIn_Idx)
976 MI.erase(&
MI.getOperand(VDstIn_Idx));
979 AMDGPU::OpName::vdst_in);
991 MCII->get(
MI.getOpcode()).getNumDefs() == 0 &&
992 MCII->get(
MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
993 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
994 if (Bytes_[0] != ExecEncoding)
998 Size = MaxInstBytesNum - Bytes.size();
1003 if (
STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
1013 if (
MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
1014 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
1015 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
1016 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
1017 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
1018 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
1019 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
1020 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
1021 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
1022 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
1023 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
1024 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
1025 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
1026 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
1027 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
1028 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
1029 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
1030 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
1031 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
1032 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
1033 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
1034 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
1035 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
1036 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
1044 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
1045 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1049 }
else if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1050 int SDst = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sdst);
1054 AMDGPU::OpName::sdst);
1074 MO.
getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1082 BaseReg, AMDGPU::sub0, &MRI.
getRegClass(AMDGPU::VReg_384RegClassID));
1083 return MO.
setReg(NewReg);
1100 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::blgp);
1105 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::cbsz);
1107 unsigned CBSZ =
MI.getOperand(CbszIdx).getImm();
1108 unsigned BLGP =
MI.getOperand(BlgpIdx).getImm();
1112 if (!AdjustedRegClassOpcode ||
1113 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1116 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1118 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1120 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1129 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1134 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1136 unsigned FmtA =
MI.getOperand(FmtAIdx).getImm();
1137 unsigned FmtB =
MI.getOperand(FmtBIdx).getImm();
1141 if (!AdjustedRegClassOpcode ||
1142 AdjustedRegClassOpcode->
Opcode ==
MI.getOpcode())
1145 MI.setOpcode(AdjustedRegClassOpcode->
Opcode);
1147 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
1149 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src1);
1167 bool IsVOP3P =
false) {
1169 unsigned Opc =
MI.getOpcode();
1170 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1171 AMDGPU::OpName::src1_modifiers,
1172 AMDGPU::OpName::src2_modifiers};
1173 for (
int J = 0; J < 3; ++J) {
1174 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
1178 unsigned Val =
MI.getOperand(
OpIdx).getImm();
1185 }
else if (J == 0) {
1196 const unsigned Opc =
MI.getOpcode();
1198 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1199 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1200 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1202 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1204 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1206 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1208 for (
const auto &[
OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1210 int OpModsIdx = AMDGPU::getNamedOperandIdx(
Opc, OpModsName);
1211 if (
OpIdx == -1 || OpModsIdx == -1)
1218 unsigned OpEnc = MRI.getEncodingValue(
Op.getReg());
1219 const MCOperand &OpMods =
MI.getOperand(OpModsIdx);
1220 unsigned ModVal = OpMods.
getImm();
1221 if (ModVal & OpSelMask) {
1231 constexpr int DST_IDX = 0;
1232 auto Opcode =
MI.getOpcode();
1233 const auto &
Desc = MCII->get(Opcode);
1234 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1236 if (OldIdx != -1 &&
Desc.getOperandConstraint(
1240 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1251 assert(
MI.getNumOperands() + 1 < MCII->get(
MI.getOpcode()).getNumOperands());
1254 AMDGPU::OpName::src2_modifiers);
1258 unsigned Opc =
MI.getOpcode();
1261 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1262 if (VDstInIdx != -1)
1265 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1266 if (
MI.getNumOperands() < DescNumOps &&
1271 AMDGPU::OpName::op_sel);
1274 if (
MI.getNumOperands() < DescNumOps &&
1277 AMDGPU::OpName::src0_modifiers);
1279 if (
MI.getNumOperands() < DescNumOps &&
1282 AMDGPU::OpName::src1_modifiers);
1290 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdst_in);
1291 if (VDstInIdx != -1)
1294 unsigned Opc =
MI.getOpcode();
1295 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1296 if (
MI.getNumOperands() < DescNumOps &&
1300 AMDGPU::OpName::op_sel);
1315 BaseReg = AMDGPU::VGPR0;
1317 BaseReg = AMDGPU::AGPR0;
1319 assert(BaseReg &&
"Only vector registers expected");
1321 return (Sub0 - BaseReg + NumRegs <= 256) ?
Reg :
MCRegister();
1328 int VDstIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1329 AMDGPU::OpName::vdst);
1331 int VDataIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1332 AMDGPU::OpName::vdata);
1334 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
1336 ? AMDGPU::OpName::srsrc
1337 : AMDGPU::OpName::rsrc;
1338 int RsrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), RsrcOpName);
1339 int DMaskIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1340 AMDGPU::OpName::dmask);
1342 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1343 AMDGPU::OpName::tfe);
1344 int D16Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
1345 AMDGPU::OpName::d16);
1352 if (BaseOpcode->
BVH) {
1358 bool IsAtomic = (VDstIdx != -1);
1362 bool IsPartialNSA =
false;
1363 unsigned AddrSize = Info->VAddrDwords;
1367 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::dim);
1369 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::a16);
1372 const bool IsA16 = (A16Idx != -1 &&
MI.getOperand(A16Idx).
getImm());
1379 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1380 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1381 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1382 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1384 if (!IsVSample && AddrSize > 12)
1387 if (AddrSize > Info->VAddrDwords) {
1388 if (!
STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1393 IsPartialNSA =
true;
1398 unsigned DMask =
MI.getOperand(DMaskIdx).getImm() & 0xf;
1399 unsigned DstSize = IsGather4 ? 4 : std::max(
llvm::popcount(DMask), 1);
1401 bool D16 = D16Idx >= 0 &&
MI.getOperand(D16Idx).getImm();
1403 DstSize = (DstSize + 1) / 2;
1406 if (TFEIdx != -1 &&
MI.getOperand(TFEIdx).getImm())
1409 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1414 if (NewOpcode == -1)
1419 if (DstSize != Info->VDataDwords) {
1420 auto DataRCID = MCII->getOpRegClassID(
1421 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1425 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1426 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1429 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1440 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1442 if (
STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1443 AddrSize != Info->VAddrDwords) {
1444 MCRegister VAddrSA =
MI.getOperand(VAddrSAIdx).getReg();
1445 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1446 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1448 auto AddrRCID = MCII->getOpRegClassID(
1449 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1452 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1458 MI.setOpcode(NewOpcode);
1460 if (NewVdata != AMDGPU::NoRegister) {
1472 assert(AddrSize <= Info->VAddrDwords);
1473 MI.erase(
MI.begin() + VAddr0Idx + AddrSize,
1474 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1482 unsigned Opc =
MI.getOpcode();
1483 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1486 if (
MI.getNumOperands() < DescNumOps &&
1490 if (
MI.getNumOperands() < DescNumOps &&
1493 AMDGPU::OpName::op_sel);
1494 if (
MI.getNumOperands() < DescNumOps &&
1497 AMDGPU::OpName::op_sel_hi);
1498 if (
MI.getNumOperands() < DescNumOps &&
1501 AMDGPU::OpName::neg_lo);
1502 if (
MI.getNumOperands() < DescNumOps &&
1505 AMDGPU::OpName::neg_hi);
1510 unsigned Opc =
MI.getOpcode();
1511 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1513 if (
MI.getNumOperands() < DescNumOps &&
1517 if (
MI.getNumOperands() < DescNumOps &&
1520 AMDGPU::OpName::src0_modifiers);
1522 if (
MI.getNumOperands() < DescNumOps &&
1525 AMDGPU::OpName::src1_modifiers);
1529 unsigned Opc =
MI.getOpcode();
1530 unsigned DescNumOps = MCII->get(
Opc).getNumOperands();
1534 if (
MI.getNumOperands() < DescNumOps &&
1538 AMDGPU::OpName::op_sel);
1543 assert(HasLiteral &&
"Should have decoded a literal");
1549 &getAMDGPUMCRegisterClass(RegClassID));
1554 const Twine& ErrMsg)
const {
1568 unsigned Val)
const {
1569 const auto &RegCl = getAMDGPUMCRegisterClass(RegClassID);
1570 if (Val >= RegCl.getNumRegs())
1572 ": unknown register " +
Twine(Val));
1578 unsigned Val)
const {
1582 switch (SRegClassID) {
1583 case AMDGPU::SGPR_32RegClassID:
1584 case AMDGPU::TTMP_32RegClassID:
1586 case AMDGPU::SGPR_64RegClassID:
1587 case AMDGPU::TTMP_64RegClassID:
1590 case AMDGPU::SGPR_96RegClassID:
1591 case AMDGPU::TTMP_96RegClassID:
1592 case AMDGPU::SGPR_128RegClassID:
1593 case AMDGPU::TTMP_128RegClassID:
1596 case AMDGPU::SGPR_256RegClassID:
1597 case AMDGPU::TTMP_256RegClassID:
1600 case AMDGPU::SGPR_288RegClassID:
1601 case AMDGPU::TTMP_288RegClassID:
1602 case AMDGPU::SGPR_320RegClassID:
1603 case AMDGPU::TTMP_320RegClassID:
1604 case AMDGPU::SGPR_352RegClassID:
1605 case AMDGPU::TTMP_352RegClassID:
1606 case AMDGPU::SGPR_384RegClassID:
1607 case AMDGPU::TTMP_384RegClassID:
1608 case AMDGPU::SGPR_512RegClassID:
1609 case AMDGPU::TTMP_512RegClassID:
1618 if (Val % (1 << shift)) {
1620 <<
": scalar reg isn't aligned " << Val;
1628 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1638 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1640 return errOperand(Val,
"More than one unique literal is illegal");
1651 return errOperand(Val,
"More than one unique literal is illegal");
1656 bool UseLit64 =
Hi_32(Literal) == 0;
1669 if (Bytes.size() < 4) {
1670 return errOperand(0,
"cannot read literal, inst bytes left " +
1671 Twine(Bytes.size()));
1678 bool HasInv2Pi =
true;
1682 int64_t Val = Literal;
1683 bool UseLit =
false;
1754 assert(
STI.hasFeature(AMDGPU::Feature64BitLiterals));
1757 if (Bytes.size() < 8) {
1758 return errOperand(0,
"cannot read literal64, inst bytes left " +
1759 Twine(Bytes.size()));
1765 bool UseLit64 =
Hi_32(Literal) == 0;
1768 Literal,
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm));
1778 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1780 (
static_cast<int64_t
>(Imm) - INLINE_INTEGER_C_MIN) :
1781 (INLINE_INTEGER_C_POSITIVE_MAX -
static_cast<int64_t
>(Imm)));
1829 return 0x3fc45f306dc9c882;
1891 return VGPR_32RegClassID;
1893 return VReg_64RegClassID;
1895 return VReg_96RegClassID;
1897 return VReg_128RegClassID;
1899 return VReg_160RegClassID;
1901 return VReg_192RegClassID;
1903 return VReg_256RegClassID;
1905 return VReg_288RegClassID;
1907 return VReg_320RegClassID;
1909 return VReg_352RegClassID;
1911 return VReg_384RegClassID;
1913 return VReg_512RegClassID;
1915 return VReg_1024RegClassID;
1926 return AGPR_32RegClassID;
1928 return AReg_64RegClassID;
1930 return AReg_96RegClassID;
1932 return AReg_128RegClassID;
1934 return AReg_160RegClassID;
1936 return AReg_256RegClassID;
1938 return AReg_288RegClassID;
1940 return AReg_320RegClassID;
1942 return AReg_352RegClassID;
1944 return AReg_384RegClassID;
1946 return AReg_512RegClassID;
1948 return AReg_1024RegClassID;
1959 return SGPR_32RegClassID;
1961 return SGPR_64RegClassID;
1963 return SGPR_96RegClassID;
1965 return SGPR_128RegClassID;
1967 return SGPR_160RegClassID;
1969 return SGPR_256RegClassID;
1971 return SGPR_288RegClassID;
1973 return SGPR_320RegClassID;
1975 return SGPR_352RegClassID;
1977 return SGPR_384RegClassID;
1979 return SGPR_512RegClassID;
1990 return TTMP_32RegClassID;
1992 return TTMP_64RegClassID;
1994 return TTMP_128RegClassID;
1996 return TTMP_256RegClassID;
1998 return TTMP_288RegClassID;
2000 return TTMP_320RegClassID;
2002 return TTMP_352RegClassID;
2004 return TTMP_384RegClassID;
2006 return TTMP_512RegClassID;
2014 unsigned TTmpMin =
isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
2015 unsigned TTmpMax =
isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
2017 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
2021 unsigned Val)
const {
2026 bool IsAGPR = Val & 512;
2029 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
2038 unsigned Val)
const {
2041 assert(Val < (1 << 8) &&
"9-bit Src encoding when Val{8} is 0");
2046 static_assert(SGPR_MIN == 0);
2055 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2056 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2057 Val == LITERAL_CONST)
2060 if (Val == LITERAL64_CONST &&
STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2083 unsigned Val)
const {
2085 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::vdstX);
2088 unsigned XDstReg = MRI.getEncodingValue(Inst.
getOperand(VDstXInd).
getReg());
2089 Val |= ~XDstReg & 1;
2182 const unsigned Val)
const {
2186 if (
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2187 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2190 if (
int(SDWA9EncValues::SRC_VGPR_MIN) <=
int(Val) &&
2191 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2193 Val - SDWA9EncValues::SRC_VGPR_MIN);
2195 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2196 Val <= (
isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2197 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2199 Val - SDWA9EncValues::SRC_SGPR_MIN);
2201 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2202 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2204 Val - SDWA9EncValues::SRC_TTMP_MIN);
2207 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2209 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2210 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2215 if (
STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2231 assert((
STI.hasFeature(AMDGPU::FeatureGFX9) ||
2232 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2233 "SDWAVopcDst should be present only on GFX9+");
2235 bool IsWave32 =
STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2237 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2238 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2254 unsigned Val)
const {
2255 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2261 unsigned Val)
const {
2278 auto [
Version, W64, W32, MDP] = Encoding::decode(Imm);
2281 if (Encoding::encode(
Version, W64, W32, MDP) != Imm)
2291 if (
I == Versions.end())
2307 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2313 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2325 return STI.hasFeature(AMDGPU::FeatureGFX11);
2333 return STI.hasFeature(AMDGPU::FeatureGFX11_7Insts);
2337 return STI.hasFeature(AMDGPU::FeatureGFX12);
2357 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2379 if (PopCount == 1) {
2380 S <<
"bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2382 S <<
"bits in range ("
2383 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) <<
':'
2384 << (TrailingZeros + BaseBytes * CHAR_BIT) <<
')';
2390#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2391#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2393 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2395#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2397 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2398 << GET_FIELD(MASK) << '\n'; \
2401#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2403 if (FourByteBuffer & (MASK)) { \
2404 return createStringError(std::errc::invalid_argument, \
2405 "kernel descriptor " DESC \
2406 " reserved %s set" MSG, \
2407 getBitRangeFromMask((MASK), 0).c_str()); \
2411#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2412#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2413 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2414#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2415 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2416#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2417 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2430 uint32_t GranulatedWorkitemVGPRCount =
2431 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2434 (GranulatedWorkitemVGPRCount + 1) *
2437 KdStream << Indent <<
".amdhsa_next_free_vgpr " << NextFreeVGPR <<
'\n';
2458 uint32_t GranulatedWavefrontSGPRCount =
2459 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2463 "must be zero on gfx10+");
2465 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2468 KdStream << Indent <<
".amdhsa_reserve_vcc " << 0 <<
'\n';
2470 KdStream << Indent <<
".amdhsa_reserve_flat_scratch " << 0 <<
'\n';
2471 bool ReservedXnackMask =
STI.hasFeature(AMDGPU::FeatureXNACK);
2472 assert(!ReservedXnackMask ||
STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2473 KdStream << Indent <<
".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2475 KdStream << Indent <<
".amdhsa_next_free_sgpr " << NextFreeSGPR <<
"\n";
2480 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2482 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2484 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2486 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2490 if (
STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2492 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2496 if (
STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2498 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2505 PRINT_DIRECTIVE(
".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2508 "COMPUTE_PGM_RSRC1",
"must be zero pre-gfx9");
2514 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2517 "COMPUTE_PGM_RSRC1");
2528 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2530 PRINT_DIRECTIVE(
".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2531 PRINT_DIRECTIVE(
".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2534 "COMPUTE_PGM_RSRC1");
2539 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2551 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2553 PRINT_DIRECTIVE(
".amdhsa_system_sgpr_private_segment_wavefront_offset",
2554 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2556 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2558 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2560 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2562 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2564 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2571 ".amdhsa_exception_fp_ieee_invalid_op",
2572 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2574 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2576 ".amdhsa_exception_fp_ieee_div_zero",
2577 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2579 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2581 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2583 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2585 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2598 KdStream << Indent <<
".amdhsa_accum_offset "
2599 << (
GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2602 PRINT_DIRECTIVE(
".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2605 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2607 "COMPUTE_PGM_RSRC3",
"must be zero on gfx90a");
2611 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2613 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2616 "SHARED_VGPR_COUNT",
2617 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2621 "COMPUTE_PGM_RSRC3",
2622 "must be zero on gfx12+");
2628 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2630 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2632 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2635 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2638 "COMPUTE_PGM_RSRC3",
2639 "must be zero on gfx10");
2644 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2649 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2652 "COMPUTE_PGM_RSRC3",
2653 "must be zero on gfx10 or gfx11");
2659 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2661 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2663 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2665 "ENABLE_DIDT_THROTTLE",
2666 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2669 "COMPUTE_PGM_RSRC3",
2670 "must be zero on gfx10+");
2675 "COMPUTE_PGM_RSRC3",
"must be zero on gfx10+");
2680 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2683 "COMPUTE_PGM_RSRC3",
2684 "must be zero on gfx10");
2686 }
else if (FourByteBuffer) {
2688 std::errc::invalid_argument,
2689 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2693#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2694#undef PRINT_DIRECTIVE
2696#undef CHECK_RESERVED_BITS_IMPL
2697#undef CHECK_RESERVED_BITS
2698#undef CHECK_RESERVED_BITS_MSG
2699#undef CHECK_RESERVED_BITS_DESC
2700#undef CHECK_RESERVED_BITS_DESC_MSG
2705 const char *Msg =
"") {
2707 std::errc::invalid_argument,
"kernel descriptor reserved %s set%s%s",
2714 unsigned WidthInBytes) {
2718 std::errc::invalid_argument,
2719 "kernel descriptor reserved bits in range (%u:%u) set",
2720 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2726#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2728 KdStream << Indent << DIRECTIVE " " \
2729 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2738 assert(Bytes.size() == 64);
2741 switch (Cursor.tell()) {
2743 FourByteBuffer = DE.
getU32(Cursor);
2744 KdStream << Indent <<
".amdhsa_group_segment_fixed_size " << FourByteBuffer
2749 FourByteBuffer = DE.
getU32(Cursor);
2750 KdStream << Indent <<
".amdhsa_private_segment_fixed_size "
2751 << FourByteBuffer <<
'\n';
2755 FourByteBuffer = DE.
getU32(Cursor);
2756 KdStream << Indent <<
".amdhsa_kernarg_size "
2757 << FourByteBuffer <<
'\n';
2762 ReservedBytes = DE.
getBytes(Cursor, 4);
2763 for (
char B : ReservedBytes) {
2778 ReservedBytes = DE.
getBytes(Cursor, 20);
2779 for (
char B : ReservedBytes) {
2786 FourByteBuffer = DE.
getU32(Cursor);
2790 FourByteBuffer = DE.
getU32(Cursor);
2794 FourByteBuffer = DE.
getU32(Cursor);
2799 TwoByteBuffer = DE.
getU16(Cursor);
2803 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2805 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2807 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2809 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2811 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2814 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2816 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2818 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2824 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2826 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2831 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2836 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2838 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2847 TwoByteBuffer = DE.
getU16(Cursor);
2848 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2850 KERNARG_PRELOAD_SPEC_LENGTH);
2853 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2855 KERNARG_PRELOAD_SPEC_OFFSET);
2861 ReservedBytes = DE.
getBytes(Cursor, 4);
2862 for (
char B : ReservedBytes) {
2872#undef PRINT_DIRECTIVE
2879 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2881 "kernel descriptor must be 64-byte aligned");
2892 EnableWavefrontSize32 =
2894 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2899 KdStream <<
".amdhsa_kernel " << KdName <<
'\n';
2902 while (
C &&
C.tell() < Bytes.size()) {
2910 KdStream <<
".end_amdhsa_kernel\n";
2929 "code object v2 is not supported");
2942const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(
StringRef Id,
2945 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2953 if (!Valid || Res != Val)
2954 Ctx.reportWarning(
SMLoc(),
"unsupported redefinition of " + Id);
2994 if (Result != Symbols->end()) {
2995 auto *Sym =
Ctx.getOrCreateSymbol(Result->Name);
3001 ReferencedAddresses.push_back(
static_cast<uint64_t>(
Value));
3020 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
bool hasKernargPreload() const
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
bool isGFX1250Plus() const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
void emitTargetIDIfSupported(raw_ostream &OS, unsigned EFlags) const override
Emit something based on ELF's e_flags if the target needs to.
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
bool hasArchitectedFlatScratch() const
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Lightweight error class with error context and mandatory checking.
Tagged union holding either a T or a Error.
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
const MCRegisterInfo * getRegisterInfo() const
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const char * getRegClassName(const MCRegisterClass *Class) const
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Symbolize and annotate disassembled instructions.
Represents a location in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Represent a constant reference to a string, i.e.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
@ EF_AMDGPU_FEATURE_XNACK_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4
@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4
@ EF_AMDGPU_FEATURE_XNACK_OFF_V4
@ EF_AMDGPU_FEATURE_XNACK_V4
@ EF_AMDGPU_FEATURE_SRAMECC_V4
@ EF_AMDGPU_FEATURE_XNACK_ON_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4
@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4
constexpr bool isAtomicRet(const T &...O)
constexpr bool isVOPC(const T &...O)
constexpr bool isVOP3(const T &...O)
constexpr bool isMAI(const T &...O)
constexpr bool isFLAT(const T &...O)
constexpr bool isVOP3P(const T &...O)
constexpr bool isBuffer(const T &...O)
constexpr bool isVIMAGE(const T &...O)
constexpr bool isSMRD(const T &...O)
constexpr bool isMIMG(const T &...O)
constexpr bool isWMMA(const T &...O)
constexpr bool isMUBUF(const T &...O)
constexpr bool isSDWA(const T &...O)
constexpr bool isEXP(const T &...O)
constexpr bool isSOPK(const T &...O)
constexpr bool isVINTERP(const T &...O)
constexpr bool isVSAMPLE(const T &...O)
constexpr bool isDS(const T &...O)
constexpr bool isGather4(const T &...O)
constexpr bool isDPP(const T &...O)
@ KERNEL_CODE_PROPERTIES_OFFSET
@ GROUP_SEGMENT_FIXED_SIZE_OFFSET
@ COMPUTE_PGM_RSRC3_OFFSET
@ KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET
@ COMPUTE_PGM_RSRC1_OFFSET
@ COMPUTE_PGM_RSRC2_OFFSET
@ PRIVATE_SEGMENT_FIXED_SIZE_OFFSET
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
uint16_t read16(const void *P, endianness E)
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.