LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyIsAsync,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyDone,
148 ImmTyRowEn,
149 ImmTyFORMAT,
150 ImmTyHwreg,
151 ImmTyOff,
152 ImmTySendMsg,
153 ImmTyWaitEvent,
154 ImmTyInterpSlot,
155 ImmTyInterpAttr,
156 ImmTyInterpAttrChan,
157 ImmTyOpSel,
158 ImmTyOpSelHi,
159 ImmTyNegLo,
160 ImmTyNegHi,
161 ImmTyIndexKey8bit,
162 ImmTyIndexKey16bit,
163 ImmTyIndexKey32bit,
164 ImmTyDPP8,
165 ImmTyDppCtrl,
166 ImmTyDppRowMask,
167 ImmTyDppBankMask,
168 ImmTyDppBoundCtrl,
169 ImmTyDppFI,
170 ImmTySwizzle,
171 ImmTyGprIdxMode,
172 ImmTyHigh,
173 ImmTyBLGP,
174 ImmTyCBSZ,
175 ImmTyABID,
176 ImmTyEndpgm,
177 ImmTyWaitVDST,
178 ImmTyWaitEXP,
179 ImmTyWaitVAVDst,
180 ImmTyWaitVMVSrc,
181 ImmTyBitOp3,
182 ImmTyMatrixAFMT,
183 ImmTyMatrixBFMT,
184 ImmTyMatrixAScale,
185 ImmTyMatrixBScale,
186 ImmTyMatrixAScaleFmt,
187 ImmTyMatrixBScaleFmt,
188 ImmTyMatrixAReuse,
189 ImmTyMatrixBReuse,
190 ImmTyScaleSel,
191 ImmTyByteSel,
192 };
193
194private:
195 struct TokOp {
196 const char *Data;
197 unsigned Length;
198 };
199
200 struct ImmOp {
201 int64_t Val;
202 ImmTy Type;
203 bool IsFPImm;
204 Modifiers Mods;
205 };
206
207 struct RegOp {
208 MCRegister RegNo;
209 Modifiers Mods;
210 };
211
212 union {
213 TokOp Tok;
214 ImmOp Imm;
215 RegOp Reg;
216 const MCExpr *Expr;
217 };
218
219 // The index of the associated MCInst operand.
220 mutable int MCOpIdx = -1;
221
222public:
223 bool isToken() const override { return Kind == Token; }
224
225 bool isSymbolRefExpr() const {
226 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
227 }
228
229 bool isImm() const override {
230 return Kind == Immediate;
231 }
232
233 bool isInlinableImm(MVT type) const;
234 bool isLiteralImm(MVT type) const;
235
236 bool isRegKind() const {
237 return Kind == Register;
238 }
239
240 bool isReg() const override {
241 return isRegKind() && !hasModifiers();
242 }
243
244 bool isRegOrInline(unsigned RCID, MVT type) const {
245 return isRegClass(RCID) || isInlinableImm(type);
246 }
247
248 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
249 return isRegOrInline(RCID, type) || isLiteralImm(type);
250 }
251
252 bool isRegOrImmWithInt16InputMods() const {
253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
254 }
255
256 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
258 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
259 }
260
261 bool isRegOrImmWithInt32InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263 }
264
265 bool isRegOrInlineImmWithInt16InputMods() const {
266 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
267 }
268
269 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
270 return isRegOrInline(
271 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
272 }
273
274 bool isRegOrInlineImmWithInt32InputMods() const {
275 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
276 }
277
278 bool isRegOrImmWithInt64InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
280 }
281
282 bool isRegOrImmWithFP16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
284 }
285
286 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
288 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
289 }
290
291 bool isRegOrImmWithFP32InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
293 }
294
295 bool isRegOrImmWithFP64InputMods() const {
296 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
297 }
298
299 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
300 return isRegOrInline(
301 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
302 }
303
304 bool isRegOrInlineImmWithFP32InputMods() const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
306 }
307
308 bool isRegOrInlineImmWithFP64InputMods() const {
309 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
310 }
311
312 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
313
314 bool isVRegWithFP32InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
316 }
317
318 bool isVRegWithFP64InputMods() const {
319 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
320 }
321
322 bool isPackedFP16InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
324 }
325
326 bool isPackedVGPRFP32InputMods() const {
327 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
328 }
329
330 bool isVReg() const {
331 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
332 isRegClass(AMDGPU::VReg_64RegClassID) ||
333 isRegClass(AMDGPU::VReg_96RegClassID) ||
334 isRegClass(AMDGPU::VReg_128RegClassID) ||
335 isRegClass(AMDGPU::VReg_160RegClassID) ||
336 isRegClass(AMDGPU::VReg_192RegClassID) ||
337 isRegClass(AMDGPU::VReg_256RegClassID) ||
338 isRegClass(AMDGPU::VReg_512RegClassID) ||
339 isRegClass(AMDGPU::VReg_1024RegClassID);
340 }
341
342 bool isVReg32() const {
343 return isRegClass(AMDGPU::VGPR_32RegClassID);
344 }
345
346 bool isVReg32OrOff() const {
347 return isOff() || isVReg32();
348 }
349
350 bool isNull() const {
351 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
352 }
353
354 bool isAV_LdSt_32_Align2_RegOp() const {
355 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
356 isRegClass(AMDGPU::AGPR_32RegClassID);
357 }
358
359 bool isVRegWithInputMods() const;
360 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
361 template <bool IsFake16> bool isT16VRegWithInputMods() const;
362
363 bool isSDWAOperand(MVT type) const;
364 bool isSDWAFP16Operand() const;
365 bool isSDWAFP32Operand() const;
366 bool isSDWAInt16Operand() const;
367 bool isSDWAInt32Operand() const;
368
369 bool isImmTy(ImmTy ImmT) const {
370 return isImm() && Imm.Type == ImmT;
371 }
372
373 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
374
375 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
376
377 bool isImmModifier() const {
378 return isImm() && Imm.Type != ImmTyNone;
379 }
380
381 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
382 bool isDim() const { return isImmTy(ImmTyDim); }
383 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
384 bool isOff() const { return isImmTy(ImmTyOff); }
385 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
386 bool isOffen() const { return isImmTy(ImmTyOffen); }
387 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
388 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
396 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
397 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
398 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
399 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
400 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
401 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
402 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
403 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
404 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
405 bool isTFE() const { return isImmTy(ImmTyTFE); }
406 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
407 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
408 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
409 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
410 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
411 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
412 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
413 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
414 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
415 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
416 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
417 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
418 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
419 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
420 bool isDone() const { return isImmTy(ImmTyDone); }
421 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
422
423 bool isRegOrImm() const {
424 return isReg() || isImm();
425 }
426
427 bool isRegClass(unsigned RCID) const;
428
429 bool isInlineValue() const;
430
431 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432 return isRegOrInline(RCID, type) && !hasModifiers();
433 }
434
435 bool isSCSrcB16() const {
436 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
437 }
438
439 bool isSCSrcV2B16() const {
440 return isSCSrcB16();
441 }
442
443 bool isSCSrc_b32() const {
444 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
445 }
446
447 bool isSCSrc_b64() const {
448 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
449 }
450
451 bool isBoolReg() const;
452
453 bool isSCSrcF16() const {
454 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
455 }
456
457 bool isSCSrcV2F16() const {
458 return isSCSrcF16();
459 }
460
461 bool isSCSrcF32() const {
462 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
463 }
464
465 bool isSCSrcF64() const {
466 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
467 }
468
469 bool isSSrc_b32() const {
470 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
471 }
472
473 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
474
475 bool isSSrcV2B16() const {
476 llvm_unreachable("cannot happen");
477 return isSSrc_b16();
478 }
479
480 bool isSSrc_b64() const {
481 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482 // See isVSrc64().
483 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
484 (((const MCTargetAsmParser *)AsmParser)
485 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
486 isExpr());
487 }
488
489 bool isSSrc_f32() const {
490 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
491 }
492
493 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
494
495 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
496
497 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
498
499 bool isSSrcV2F16() const {
500 llvm_unreachable("cannot happen");
501 return isSSrc_f16();
502 }
503
504 bool isSSrcV2FP32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_f32();
507 }
508
509 bool isSCSrcV2FP32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrcF32();
512 }
513
514 bool isSSrcV2INT32() const {
515 llvm_unreachable("cannot happen");
516 return isSSrc_b32();
517 }
518
519 bool isSCSrcV2INT32() const {
520 llvm_unreachable("cannot happen");
521 return isSCSrc_b32();
522 }
523
524 bool isSSrcOrLds_b32() const {
525 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
526 isLiteralImm(MVT::i32) || isExpr();
527 }
528
529 bool isVCSrc_b32() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
531 }
532
533 bool isVCSrc_b32_Lo256() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
535 }
536
537 bool isVCSrc_b64_Lo256() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
539 }
540
541 bool isVCSrc_b64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
543 }
544
545 bool isVCSrcT_b16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
547 }
548
549 bool isVCSrcTB16_Lo128() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
551 }
552
553 bool isVCSrcFake16B16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
555 }
556
557 bool isVCSrc_b16() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
559 }
560
561 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
562
563 bool isVCSrc_f32() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
565 }
566
567 bool isVCSrc_f64() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
569 }
570
571 bool isVCSrcTBF16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
573 }
574
575 bool isVCSrcT_f16() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
577 }
578
579 bool isVCSrcT_bf16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
581 }
582
583 bool isVCSrcTBF16_Lo128() const {
584 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
585 }
586
587 bool isVCSrcTF16_Lo128() const {
588 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
589 }
590
591 bool isVCSrcFake16BF16_Lo128() const {
592 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
593 }
594
595 bool isVCSrcFake16F16_Lo128() const {
596 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
597 }
598
599 bool isVCSrc_bf16() const {
600 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
601 }
602
603 bool isVCSrc_f16() const {
604 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
605 }
606
607 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
608
609 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
610
611 bool isVSrc_b32() const {
612 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
613 }
614
615 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
616
617 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
618
619 bool isVSrcT_b16_Lo128() const {
620 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
621 }
622
623 bool isVSrcFake16_b16_Lo128() const {
624 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
625 }
626
627 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
628
629 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
630
631 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
632
633 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
634
635 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
636
637 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
638
639 bool isVSrc_f32() const {
640 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
641 }
642
643 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
644
645 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
646
647 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
648
649 bool isVSrcT_bf16_Lo128() const {
650 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
651 }
652
653 bool isVSrcT_f16_Lo128() const {
654 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
655 }
656
657 bool isVSrcFake16_bf16_Lo128() const {
658 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
659 }
660
661 bool isVSrcFake16_f16_Lo128() const {
662 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
663 }
664
665 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
666
667 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
668
669 bool isVSrc_v2bf16() const {
670 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
671 }
672
673 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
674
675 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
676
677 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
678
679 bool isVISrcB32() const {
680 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
681 }
682
683 bool isVISrcB16() const {
684 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
685 }
686
687 bool isVISrcV2B16() const {
688 return isVISrcB16();
689 }
690
691 bool isVISrcF32() const {
692 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
693 }
694
695 bool isVISrcF16() const {
696 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
697 }
698
699 bool isVISrcV2F16() const {
700 return isVISrcF16() || isVISrcB32();
701 }
702
703 bool isVISrc_64_bf16() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
705 }
706
707 bool isVISrc_64_f16() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
709 }
710
711 bool isVISrc_64_b32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
713 }
714
715 bool isVISrc_64B64() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
717 }
718
719 bool isVISrc_64_f64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
721 }
722
723 bool isVISrc_64V2FP32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
725 }
726
727 bool isVISrc_64V2INT32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
729 }
730
731 bool isVISrc_256_b32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
733 }
734
735 bool isVISrc_256_f32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
737 }
738
739 bool isVISrc_256B64() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
741 }
742
743 bool isVISrc_256_f64() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
745 }
746
747 bool isVISrc_512_f64() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
749 }
750
751 bool isVISrc_128B16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
753 }
754
755 bool isVISrc_128V2B16() const {
756 return isVISrc_128B16();
757 }
758
759 bool isVISrc_128_b32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
761 }
762
763 bool isVISrc_128_f32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
765 }
766
767 bool isVISrc_256V2FP32() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
769 }
770
771 bool isVISrc_256V2INT32() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
773 }
774
775 bool isVISrc_512_b32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
777 }
778
779 bool isVISrc_512B16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
781 }
782
783 bool isVISrc_512V2B16() const {
784 return isVISrc_512B16();
785 }
786
787 bool isVISrc_512_f32() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
789 }
790
791 bool isVISrc_512F16() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
793 }
794
795 bool isVISrc_512V2F16() const {
796 return isVISrc_512F16() || isVISrc_512_b32();
797 }
798
799 bool isVISrc_1024_b32() const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
801 }
802
803 bool isVISrc_1024B16() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
805 }
806
807 bool isVISrc_1024V2B16() const {
808 return isVISrc_1024B16();
809 }
810
811 bool isVISrc_1024_f32() const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
813 }
814
815 bool isVISrc_1024F16() const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
817 }
818
819 bool isVISrc_1024V2F16() const {
820 return isVISrc_1024F16() || isVISrc_1024_b32();
821 }
822
823 bool isAISrcB32() const {
824 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
825 }
826
827 bool isAISrcB16() const {
828 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
829 }
830
831 bool isAISrcV2B16() const {
832 return isAISrcB16();
833 }
834
835 bool isAISrcF32() const {
836 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
837 }
838
839 bool isAISrcF16() const {
840 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
841 }
842
843 bool isAISrcV2F16() const {
844 return isAISrcF16() || isAISrcB32();
845 }
846
847 bool isAISrc_64B64() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
849 }
850
851 bool isAISrc_64_f64() const {
852 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
853 }
854
855 bool isAISrc_128_b32() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
857 }
858
859 bool isAISrc_128B16() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
861 }
862
863 bool isAISrc_128V2B16() const {
864 return isAISrc_128B16();
865 }
866
867 bool isAISrc_128_f32() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
869 }
870
871 bool isAISrc_128F16() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
873 }
874
875 bool isAISrc_128V2F16() const {
876 return isAISrc_128F16() || isAISrc_128_b32();
877 }
878
879 bool isVISrc_128_bf16() const {
880 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
881 }
882
883 bool isVISrc_128_f16() const {
884 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
885 }
886
887 bool isVISrc_128V2F16() const {
888 return isVISrc_128_f16() || isVISrc_128_b32();
889 }
890
891 bool isAISrc_256B64() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
893 }
894
895 bool isAISrc_256_f64() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
897 }
898
899 bool isAISrc_512_b32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
901 }
902
903 bool isAISrc_512B16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
905 }
906
907 bool isAISrc_512V2B16() const {
908 return isAISrc_512B16();
909 }
910
911 bool isAISrc_512_f32() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
913 }
914
915 bool isAISrc_512F16() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
917 }
918
919 bool isAISrc_512V2F16() const {
920 return isAISrc_512F16() || isAISrc_512_b32();
921 }
922
923 bool isAISrc_1024_b32() const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
925 }
926
927 bool isAISrc_1024B16() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
929 }
930
931 bool isAISrc_1024V2B16() const {
932 return isAISrc_1024B16();
933 }
934
935 bool isAISrc_1024_f32() const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
937 }
938
939 bool isAISrc_1024F16() const {
940 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
941 }
942
943 bool isAISrc_1024V2F16() const {
944 return isAISrc_1024F16() || isAISrc_1024_b32();
945 }
946
947 bool isKImmFP32() const {
948 return isLiteralImm(MVT::f32);
949 }
950
951 bool isKImmFP16() const {
952 return isLiteralImm(MVT::f16);
953 }
954
955 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
956
957 bool isMem() const override {
958 return false;
959 }
960
961 bool isExpr() const {
962 return Kind == Expression;
963 }
964
965 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
966
967 bool isSWaitCnt() const;
968 bool isDepCtr() const;
969 bool isSDelayALU() const;
970 bool isHwreg() const;
971 bool isSendMsg() const;
972 bool isWaitEvent() const;
973 bool isSplitBarrier() const;
974 bool isSwizzle() const;
975 bool isSMRDOffset8() const;
976 bool isSMEMOffset() const;
977 bool isSMRDLiteralOffset() const;
978 bool isDPP8() const;
979 bool isDPPCtrl() const;
980 bool isBLGP() const;
981 bool isGPRIdxMode() const;
982 bool isS16Imm() const;
983 bool isU16Imm() const;
984 bool isEndpgm() const;
985
986 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
987 return [this, P]() { return P(*this); };
988 }
989
990 StringRef getToken() const {
991 assert(isToken());
992 return StringRef(Tok.Data, Tok.Length);
993 }
994
995 int64_t getImm() const {
996 assert(isImm());
997 return Imm.Val;
998 }
999
1000 void setImm(int64_t Val) {
1001 assert(isImm());
1002 Imm.Val = Val;
1003 }
1004
1005 ImmTy getImmTy() const {
1006 assert(isImm());
1007 return Imm.Type;
1008 }
1009
1010 MCRegister getReg() const override {
1011 assert(isRegKind());
1012 return Reg.RegNo;
1013 }
1014
1015 SMLoc getStartLoc() const override {
1016 return StartLoc;
1017 }
1018
1019 SMLoc getEndLoc() const override {
1020 return EndLoc;
1021 }
1022
1023 SMRange getLocRange() const {
1024 return SMRange(StartLoc, EndLoc);
1025 }
1026
1027 int getMCOpIdx() const { return MCOpIdx; }
1028
1029 Modifiers getModifiers() const {
1030 assert(isRegKind() || isImmTy(ImmTyNone));
1031 return isRegKind() ? Reg.Mods : Imm.Mods;
1032 }
1033
1034 void setModifiers(Modifiers Mods) {
1035 assert(isRegKind() || isImmTy(ImmTyNone));
1036 if (isRegKind())
1037 Reg.Mods = Mods;
1038 else
1039 Imm.Mods = Mods;
1040 }
1041
1042 bool hasModifiers() const {
1043 return getModifiers().hasModifiers();
1044 }
1045
1046 bool hasFPModifiers() const {
1047 return getModifiers().hasFPModifiers();
1048 }
1049
1050 bool hasIntModifiers() const {
1051 return getModifiers().hasIntModifiers();
1052 }
1053
1054 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1055
1056 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1057
1058 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1059
1060 void addRegOperands(MCInst &Inst, unsigned N) const;
1061
1062 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1063 if (isRegKind())
1064 addRegOperands(Inst, N);
1065 else
1066 addImmOperands(Inst, N);
1067 }
1068
1069 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070 Modifiers Mods = getModifiers();
1071 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1072 if (isRegKind()) {
1073 addRegOperands(Inst, N);
1074 } else {
1075 addImmOperands(Inst, N, false);
1076 }
1077 }
1078
1079 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1080 assert(!hasIntModifiers());
1081 addRegOrImmWithInputModsOperands(Inst, N);
1082 }
1083
1084 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasFPModifiers());
1086 addRegOrImmWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1090 Modifiers Mods = getModifiers();
1091 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1092 assert(isRegKind());
1093 addRegOperands(Inst, N);
1094 }
1095
1096 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1097 assert(!hasIntModifiers());
1098 addRegWithInputModsOperands(Inst, N);
1099 }
1100
1101 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1102 assert(!hasFPModifiers());
1103 addRegWithInputModsOperands(Inst, N);
1104 }
1105
1106 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1107 // clang-format off
1108 switch (Type) {
1109 case ImmTyNone: OS << "None"; break;
1110 case ImmTyGDS: OS << "GDS"; break;
1111 case ImmTyLDS: OS << "LDS"; break;
1112 case ImmTyOffen: OS << "Offen"; break;
1113 case ImmTyIdxen: OS << "Idxen"; break;
1114 case ImmTyAddr64: OS << "Addr64"; break;
1115 case ImmTyOffset: OS << "Offset"; break;
1116 case ImmTyInstOffset: OS << "InstOffset"; break;
1117 case ImmTyOffset0: OS << "Offset0"; break;
1118 case ImmTyOffset1: OS << "Offset1"; break;
1119 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1120 case ImmTyCPol: OS << "CPol"; break;
1121 case ImmTyIndexKey8bit: OS << "index_key"; break;
1122 case ImmTyIndexKey16bit: OS << "index_key"; break;
1123 case ImmTyIndexKey32bit: OS << "index_key"; break;
1124 case ImmTyTFE: OS << "TFE"; break;
1125 case ImmTyIsAsync: OS << "IsAsync"; break;
1126 case ImmTyD16: OS << "D16"; break;
1127 case ImmTyFORMAT: OS << "FORMAT"; break;
1128 case ImmTyClamp: OS << "Clamp"; break;
1129 case ImmTyOModSI: OS << "OModSI"; break;
1130 case ImmTyDPP8: OS << "DPP8"; break;
1131 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1132 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1133 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1134 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1135 case ImmTyDppFI: OS << "DppFI"; break;
1136 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1137 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1138 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1139 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1140 case ImmTyDMask: OS << "DMask"; break;
1141 case ImmTyDim: OS << "Dim"; break;
1142 case ImmTyUNorm: OS << "UNorm"; break;
1143 case ImmTyDA: OS << "DA"; break;
1144 case ImmTyR128A16: OS << "R128A16"; break;
1145 case ImmTyA16: OS << "A16"; break;
1146 case ImmTyLWE: OS << "LWE"; break;
1147 case ImmTyOff: OS << "Off"; break;
1148 case ImmTyExpTgt: OS << "ExpTgt"; break;
1149 case ImmTyExpCompr: OS << "ExpCompr"; break;
1150 case ImmTyExpVM: OS << "ExpVM"; break;
1151 case ImmTyDone: OS << "Done"; break;
1152 case ImmTyRowEn: OS << "RowEn"; break;
1153 case ImmTyHwreg: OS << "Hwreg"; break;
1154 case ImmTySendMsg: OS << "SendMsg"; break;
1155 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1156 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1157 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1158 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1159 case ImmTyOpSel: OS << "OpSel"; break;
1160 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1161 case ImmTyNegLo: OS << "NegLo"; break;
1162 case ImmTyNegHi: OS << "NegHi"; break;
1163 case ImmTySwizzle: OS << "Swizzle"; break;
1164 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1165 case ImmTyHigh: OS << "High"; break;
1166 case ImmTyBLGP: OS << "BLGP"; break;
1167 case ImmTyCBSZ: OS << "CBSZ"; break;
1168 case ImmTyABID: OS << "ABID"; break;
1169 case ImmTyEndpgm: OS << "Endpgm"; break;
1170 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1171 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1172 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1173 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1174 case ImmTyBitOp3: OS << "BitOp3"; break;
1175 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1176 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1177 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1178 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1179 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1180 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1181 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1182 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1183 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1184 case ImmTyByteSel: OS << "ByteSel" ; break;
1185 }
1186 // clang-format on
1187 }
1188
1189 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1190 switch (Kind) {
1191 case Register:
1192 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1193 << " mods: " << Reg.Mods << '>';
1194 break;
1195 case Immediate:
1196 OS << '<' << getImm();
1197 if (getImmTy() != ImmTyNone) {
1198 OS << " type: "; printImmTy(OS, getImmTy());
1199 }
1200 OS << " mods: " << Imm.Mods << '>';
1201 break;
1202 case Token:
1203 OS << '\'' << getToken() << '\'';
1204 break;
1205 case Expression:
1206 OS << "<expr ";
1207 MAI.printExpr(OS, *Expr);
1208 OS << '>';
1209 break;
1210 }
1211 }
1212
1213 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1214 int64_t Val, SMLoc Loc,
1215 ImmTy Type = ImmTyNone,
1216 bool IsFPImm = false) {
1217 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1218 Op->Imm.Val = Val;
1219 Op->Imm.IsFPImm = IsFPImm;
1220 Op->Imm.Type = Type;
1221 Op->Imm.Mods = Modifiers();
1222 Op->StartLoc = Loc;
1223 Op->EndLoc = Loc;
1224 return Op;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1228 StringRef Str, SMLoc Loc,
1229 bool HasExplicitEncodingSize = true) {
1230 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1231 Res->Tok.Data = Str.data();
1232 Res->Tok.Length = Str.size();
1233 Res->StartLoc = Loc;
1234 Res->EndLoc = Loc;
1235 return Res;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1239 MCRegister Reg, SMLoc S, SMLoc E) {
1240 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1241 Op->Reg.RegNo = Reg;
1242 Op->Reg.Mods = Modifiers();
1243 Op->StartLoc = S;
1244 Op->EndLoc = E;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1249 const class MCExpr *Expr, SMLoc S) {
1250 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1251 Op->Expr = Expr;
1252 Op->StartLoc = S;
1253 Op->EndLoc = S;
1254 return Op;
1255 }
1256};
1257
1258raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1259 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1260 return OS;
1261}
1262
1263//===----------------------------------------------------------------------===//
1264// AsmParser
1265//===----------------------------------------------------------------------===//
1266
1267// TODO: define GET_SUBTARGET_FEATURE_NAME
1268#define GET_REGISTER_MATCHER
1269#include "AMDGPUGenAsmMatcher.inc"
1270#undef GET_REGISTER_MATCHER
1271#undef GET_SUBTARGET_FEATURE_NAME
1272
1273// Holds info related to the current kernel, e.g. count of SGPRs used.
1274// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1275// .amdgpu_hsa_kernel or at EOF.
1276class KernelScopeInfo {
1277 int SgprIndexUnusedMin = -1;
1278 int VgprIndexUnusedMin = -1;
1279 int AgprIndexUnusedMin = -1;
1280 MCContext *Ctx = nullptr;
1281 MCSubtargetInfo const *MSTI = nullptr;
1282
1283 void usesSgprAt(int i) {
1284 if (i >= SgprIndexUnusedMin) {
1285 SgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1289 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1290 }
1291 }
1292 }
1293
1294 void usesVgprAt(int i) {
1295 if (i >= VgprIndexUnusedMin) {
1296 VgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1300 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1301 VgprIndexUnusedMin);
1302 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1303 }
1304 }
1305 }
1306
1307 void usesAgprAt(int i) {
1308 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1309 if (!hasMAIInsts(*MSTI))
1310 return;
1311
1312 if (i >= AgprIndexUnusedMin) {
1313 AgprIndexUnusedMin = ++i;
1314 if (Ctx) {
1315 MCSymbol* const Sym =
1316 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1317 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1318
1319 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1320 MCSymbol* const vSym =
1321 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1322 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1323 VgprIndexUnusedMin);
1324 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1325 }
1326 }
1327 }
1328
1329public:
1330 KernelScopeInfo() = default;
1331
1332 void initialize(MCContext &Context) {
1333 Ctx = &Context;
1334 MSTI = Ctx->getSubtargetInfo();
1335
1336 usesSgprAt(SgprIndexUnusedMin = -1);
1337 usesVgprAt(VgprIndexUnusedMin = -1);
1338 if (hasMAIInsts(*MSTI)) {
1339 usesAgprAt(AgprIndexUnusedMin = -1);
1340 }
1341 }
1342
1343 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1344 unsigned RegWidth) {
1345 switch (RegKind) {
1346 case IS_SGPR:
1347 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1348 break;
1349 case IS_AGPR:
1350 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1351 break;
1352 case IS_VGPR:
1353 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359};
1360
1361class AMDGPUAsmParser : public MCTargetAsmParser {
1362 MCAsmParser &Parser;
1363
1364 unsigned ForcedEncodingSize = 0;
1365 bool ForcedDPP = false;
1366 bool ForcedSDWA = false;
1367 KernelScopeInfo KernelScope;
1368 const unsigned HwMode;
1369
1370 /// @name Auto-generated Match Functions
1371 /// {
1372
1373#define GET_ASSEMBLER_HEADER
1374#include "AMDGPUGenAsmMatcher.inc"
1375
1376 /// }
1377
1378 /// Get size of register operand
1379 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1380 assert(OpNo < Desc.NumOperands);
1381 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1382 return getRegBitWidth(RCID) / 8;
1383 }
1384
1385private:
1386 void createConstantSymbol(StringRef Id, int64_t Val);
1387
1388 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1389 bool OutOfRangeError(SMRange Range);
1390 /// Calculate VGPR/SGPR blocks required for given target, reserved
1391 /// registers, and user-specified NextFreeXGPR values.
1392 ///
1393 /// \param Features [in] Target features, used for bug corrections.
1394 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1395 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1396 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1397 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1398 /// descriptor field, if valid.
1399 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1400 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1401 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1402 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1403 /// \param VGPRBlocks [out] Result VGPR block count.
1404 /// \param SGPRBlocks [out] Result SGPR block count.
1405 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1406 const MCExpr *FlatScrUsed, bool XNACKUsed,
1407 std::optional<bool> EnableWavefrontSize32,
1408 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1409 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1410 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1411 bool ParseDirectiveAMDGCNTarget();
1412 bool ParseDirectiveAMDHSACodeObjectVersion();
1413 bool ParseDirectiveAMDHSAKernel();
1414 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1415 bool ParseDirectiveAMDKernelCodeT();
1416 // TODO: Possibly make subtargetHasRegister const.
1417 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1418 bool ParseDirectiveAMDGPUHsaKernel();
1419
1420 bool ParseDirectiveISAVersion();
1421 bool ParseDirectiveHSAMetadata();
1422 bool ParseDirectivePALMetadataBegin();
1423 bool ParseDirectivePALMetadata();
1424 bool ParseDirectiveAMDGPULDS();
1425
1426 /// Common code to parse out a block of text (typically YAML) between start and
1427 /// end directives.
1428 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1429 const char *AssemblerDirectiveEnd,
1430 std::string &CollectString);
1431
1432 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1433 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1434 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1435 unsigned &RegNum, unsigned &RegWidth,
1436 bool RestoreOnFailure = false);
1437 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1438 unsigned &RegNum, unsigned &RegWidth,
1439 SmallVectorImpl<AsmToken> &Tokens);
1440 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1441 unsigned &RegWidth,
1442 SmallVectorImpl<AsmToken> &Tokens);
1443 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1444 unsigned &RegWidth,
1445 SmallVectorImpl<AsmToken> &Tokens);
1446 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1447 unsigned &RegWidth,
1448 SmallVectorImpl<AsmToken> &Tokens);
1449 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1450 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1451 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1452
1453 bool isRegister();
1454 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1455 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1456 void initializeGprCountSymbol(RegisterKind RegKind);
1457 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1458 unsigned RegWidth);
1459 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1460 bool IsAtomic);
1461
1462public:
1463 enum OperandMode {
1464 OperandMode_Default,
1465 OperandMode_NSA,
1466 };
1467
1468 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1469
1470 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1471 const MCInstrInfo &MII, const MCTargetOptions &Options)
1472 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1473 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1475
1476 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1477
1478 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1479 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1480 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1481 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1482 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1483 } else {
1484 createConstantSymbol(".option.machine_version_major", ISA.Major);
1485 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1486 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1487 }
1488 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1489 initializeGprCountSymbol(IS_VGPR);
1490 initializeGprCountSymbol(IS_SGPR);
1491 } else
1492 KernelScope.initialize(getContext());
1493
1494 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1495 createConstantSymbol(Symbol, Code);
1496
1497 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1498 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1499 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1500 }
1501
1502 bool hasMIMG_R128() const {
1503 return AMDGPU::hasMIMG_R128(getSTI());
1504 }
1505
1506 bool hasPackedD16() const {
1507 return AMDGPU::hasPackedD16(getSTI());
1508 }
1509
1510 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1511
1512 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1513
1514 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1515
1516 bool isSI() const {
1517 return AMDGPU::isSI(getSTI());
1518 }
1519
1520 bool isCI() const {
1521 return AMDGPU::isCI(getSTI());
1522 }
1523
1524 bool isVI() const {
1525 return AMDGPU::isVI(getSTI());
1526 }
1527
1528 bool isGFX9() const {
1529 return AMDGPU::isGFX9(getSTI());
1530 }
1531
1532 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1533 bool isGFX90A() const {
1534 return AMDGPU::isGFX90A(getSTI());
1535 }
1536
1537 bool isGFX940() const {
1538 return AMDGPU::isGFX940(getSTI());
1539 }
1540
1541 bool isGFX9Plus() const {
1542 return AMDGPU::isGFX9Plus(getSTI());
1543 }
1544
1545 bool isGFX10() const {
1546 return AMDGPU::isGFX10(getSTI());
1547 }
1548
1549 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1550
1551 bool isGFX11() const {
1552 return AMDGPU::isGFX11(getSTI());
1553 }
1554
1555 bool isGFX11Plus() const {
1556 return AMDGPU::isGFX11Plus(getSTI());
1557 }
1558
1559 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1560
1561 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1562
1563 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1564
1565 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1566
1567 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1568
1569 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1570
1571 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1572
1573 bool isGFX10_BEncoding() const {
1574 return AMDGPU::isGFX10_BEncoding(getSTI());
1575 }
1576
1577 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1578
1579 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1580
1581 bool hasInv2PiInlineImm() const {
1582 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1583 }
1584
1585 bool has64BitLiterals() const {
1586 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1587 }
1588
1589 bool hasFlatOffsets() const {
1590 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1591 }
1592
1593 bool hasTrue16Insts() const {
1594 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1595 }
1596
1597 bool hasArchitectedFlatScratch() const {
1598 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1599 }
1600
1601 bool hasSGPR102_SGPR103() const {
1602 return !isVI() && !isGFX9();
1603 }
1604
1605 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1606
1607 bool hasIntClamp() const {
1608 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1609 }
1610
1611 bool hasPartialNSAEncoding() const {
1612 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1613 }
1614
1615 bool hasGloballyAddressableScratch() const {
1616 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1617 }
1618
1619 unsigned getNSAMaxSize(bool HasSampler = false) const {
1620 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1621 }
1622
1623 unsigned getMaxNumUserSGPRs() const {
1624 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1625 }
1626
1627 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1628
1629 AMDGPUTargetStreamer &getTargetStreamer() {
1630 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1631 return static_cast<AMDGPUTargetStreamer &>(TS);
1632 }
1633
1634 MCContext &getContext() const {
1635 // We need this const_cast because for some reason getContext() is not const
1636 // in MCAsmParser.
1637 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1638 }
1639
1640 const MCRegisterInfo *getMRI() const {
1641 return getContext().getRegisterInfo();
1642 }
1643
1644 const MCInstrInfo *getMII() const {
1645 return &MII;
1646 }
1647
1648 // FIXME: This should not be used. Instead, should use queries derived from
1649 // getAvailableFeatures().
1650 const FeatureBitset &getFeatureBits() const {
1651 return getSTI().getFeatureBits();
1652 }
1653
1654 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1655 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1656 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1657
1658 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1659 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1660 bool isForcedDPP() const { return ForcedDPP; }
1661 bool isForcedSDWA() const { return ForcedSDWA; }
1662 ArrayRef<unsigned> getMatchedVariants() const;
1663 StringRef getMatchedVariantName() const;
1664
1665 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1666 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1667 bool RestoreOnFailure);
1668 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1669 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1670 SMLoc &EndLoc) override;
1671 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1672 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1673 unsigned Kind) override;
1674 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1675 OperandVector &Operands, MCStreamer &Out,
1676 uint64_t &ErrorInfo,
1677 bool MatchingInlineAsm) override;
1678 bool ParseDirective(AsmToken DirectiveID) override;
1679 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1680 OperandMode Mode = OperandMode_Default);
1681 StringRef parseMnemonicSuffix(StringRef Name);
1682 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1683 SMLoc NameLoc, OperandVector &Operands) override;
1684 //bool ProcessInstruction(MCInst &Inst);
1685
1686 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1687
1688 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1689
1690 ParseStatus
1691 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1692 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1693 std::function<bool(int64_t &)> ConvertResult = nullptr);
1694
1695 ParseStatus parseOperandArrayWithPrefix(
1696 const char *Prefix, OperandVector &Operands,
1697 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1698 bool (*ConvertResult)(int64_t &) = nullptr);
1699
1700 ParseStatus
1701 parseNamedBit(StringRef Name, OperandVector &Operands,
1702 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1703 bool IgnoreNegative = false);
1704 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1705 ParseStatus parseCPol(OperandVector &Operands);
1706 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1707 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1708 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1709 SMLoc &StringLoc);
1710 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1711 StringRef Name,
1712 ArrayRef<const char *> Ids,
1713 int64_t &IntVal);
1714 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1715 StringRef Name,
1716 ArrayRef<const char *> Ids,
1717 AMDGPUOperand::ImmTy Type);
1718
1719 bool isModifier();
1720 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1721 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1722 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1723 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1724 bool parseSP3NegModifier();
1725 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1726 LitModifier Lit = LitModifier::None);
1727 ParseStatus parseReg(OperandVector &Operands);
1728 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1729 LitModifier Lit = LitModifier::None);
1730 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1731 bool AllowImm = true);
1732 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1733 bool AllowImm = true);
1734 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1735 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1736 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1737 ParseStatus tryParseIndexKey(OperandVector &Operands,
1738 AMDGPUOperand::ImmTy ImmTy);
1739 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1740 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1741 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1742 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1743 AMDGPUOperand::ImmTy Type);
1744 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1745 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1746 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1747 AMDGPUOperand::ImmTy Type);
1748 ParseStatus parseMatrixAScale(OperandVector &Operands);
1749 ParseStatus parseMatrixBScale(OperandVector &Operands);
1750 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1751 AMDGPUOperand::ImmTy Type);
1752 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1753 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1754
1755 ParseStatus parseDfmtNfmt(int64_t &Format);
1756 ParseStatus parseUfmt(int64_t &Format);
1757 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1758 int64_t &Format);
1759 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1760 int64_t &Format);
1761 ParseStatus parseFORMAT(OperandVector &Operands);
1762 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1763 ParseStatus parseNumericFormat(int64_t &Format);
1764 ParseStatus parseFlatOffset(OperandVector &Operands);
1765 ParseStatus parseR128A16(OperandVector &Operands);
1766 ParseStatus parseBLGP(OperandVector &Operands);
1767 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1768 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1769
1770 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1771
1772 bool parseCnt(int64_t &IntVal);
1773 ParseStatus parseSWaitCnt(OperandVector &Operands);
1774
1775 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1776 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1777 ParseStatus parseDepCtr(OperandVector &Operands);
1778
1779 bool parseDelay(int64_t &Delay);
1780 ParseStatus parseSDelayALU(OperandVector &Operands);
1781
1782 ParseStatus parseHwreg(OperandVector &Operands);
1783
1784private:
1785 struct OperandInfoTy {
1786 SMLoc Loc;
1787 int64_t Val;
1788 bool IsSymbolic = false;
1789 bool IsDefined = false;
1790
1791 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1792 };
1793
1794 struct StructuredOpField : OperandInfoTy {
1795 StringLiteral Id;
1796 StringLiteral Desc;
1797 unsigned Width;
1798 bool IsDefined = false;
1799
1800 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1801 unsigned Width, int64_t Default)
1802 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1803 virtual ~StructuredOpField() = default;
1804
1805 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1806 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1807 return false;
1808 }
1809
1810 virtual bool validate(AMDGPUAsmParser &Parser) const {
1811 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1812 return Error(Parser, "not supported on this GPU");
1813 if (!isUIntN(Width, Val))
1814 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1815 return true;
1816 }
1817 };
1818
1819 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1820 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1821
1822 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1823 bool validateSendMsg(const OperandInfoTy &Msg,
1824 const OperandInfoTy &Op,
1825 const OperandInfoTy &Stream);
1826
1827 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1828 OperandInfoTy &Width);
1829
1830 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1831
1832 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1833 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1834 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1835
1836 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1837 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1838 const OperandVector &Operands) const;
1839 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1840 const OperandVector &Operands) const;
1841 SMLoc getInstLoc(const OperandVector &Operands) const;
1842
1843 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1844 const OperandVector &Operands);
1845 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1847 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1850 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1851 bool AsVOPD3);
1852 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1853 bool tryVOPD(const MCInst &Inst);
1854 bool tryVOPD3(const MCInst &Inst);
1855 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1856
1857 bool validateIntClampSupported(const MCInst &Inst);
1858 bool validateMIMGAtomicDMask(const MCInst &Inst);
1859 bool validateMIMGGatherDMask(const MCInst &Inst);
1860 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1861 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1862 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1863 bool validateMIMGD16(const MCInst &Inst);
1864 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateTensorR128(const MCInst &Inst);
1866 bool validateMIMGMSAA(const MCInst &Inst);
1867 bool validateOpSel(const MCInst &Inst);
1868 bool validateTrue16OpSel(const MCInst &Inst);
1869 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1870 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1871 bool validateVccOperand(MCRegister Reg) const;
1872 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1873 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1874 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateAGPRLdSt(const MCInst &Inst) const;
1877 bool validateVGPRAlign(const MCInst &Inst) const;
1878 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1879 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1880 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateDivScale(const MCInst &Inst);
1882 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1884 SMLoc IDLoc);
1885 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1886 const unsigned CPol);
1887 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1888 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1890 unsigned getConstantBusLimit(unsigned Opcode) const;
1891 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1892 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1893 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1894
1895 bool isSupportedMnemo(StringRef Mnemo,
1896 const FeatureBitset &FBS);
1897 bool isSupportedMnemo(StringRef Mnemo,
1898 const FeatureBitset &FBS,
1899 ArrayRef<unsigned> Variants);
1900 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1901
1902 bool isId(const StringRef Id) const;
1903 bool isId(const AsmToken &Token, const StringRef Id) const;
1904 bool isToken(const AsmToken::TokenKind Kind) const;
1905 StringRef getId() const;
1906 bool trySkipId(const StringRef Id);
1907 bool trySkipId(const StringRef Pref, const StringRef Id);
1908 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1909 bool trySkipToken(const AsmToken::TokenKind Kind);
1910 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1911 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1912 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1913
1914 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1915 AsmToken::TokenKind getTokenKind() const;
1916 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1917 bool parseExpr(OperandVector &Operands);
1918 StringRef getTokenStr() const;
1919 AsmToken peekToken(bool ShouldSkipSpace = true);
1920 AsmToken getToken() const;
1921 SMLoc getLoc() const;
1922 void lex();
1923
1924public:
1925 void onBeginOfFile() override;
1926 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1927
1928 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1929
1930 ParseStatus parseExpTgt(OperandVector &Operands);
1931 ParseStatus parseSendMsg(OperandVector &Operands);
1932 ParseStatus parseWaitEvent(OperandVector &Operands);
1933 ParseStatus parseInterpSlot(OperandVector &Operands);
1934 ParseStatus parseInterpAttr(OperandVector &Operands);
1935 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1936 ParseStatus parseBoolReg(OperandVector &Operands);
1937
1938 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1939 const unsigned MaxVal, const Twine &ErrMsg,
1940 SMLoc &Loc);
1941 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1942 const unsigned MinVal,
1943 const unsigned MaxVal,
1944 const StringRef ErrMsg);
1945 ParseStatus parseSwizzle(OperandVector &Operands);
1946 bool parseSwizzleOffset(int64_t &Imm);
1947 bool parseSwizzleMacro(int64_t &Imm);
1948 bool parseSwizzleQuadPerm(int64_t &Imm);
1949 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1950 bool parseSwizzleBroadcast(int64_t &Imm);
1951 bool parseSwizzleSwap(int64_t &Imm);
1952 bool parseSwizzleReverse(int64_t &Imm);
1953 bool parseSwizzleFFT(int64_t &Imm);
1954 bool parseSwizzleRotate(int64_t &Imm);
1955
1956 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1957 int64_t parseGPRIdxMacro();
1958
1959 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1960 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1961
1962 ParseStatus parseOModSI(OperandVector &Operands);
1963
1964 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1965 OptionalImmIndexMap &OptionalIdx);
1966 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1967 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1968 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1969 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1970 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1971
1972 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1973 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1974 OptionalImmIndexMap &OptionalIdx);
1975 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1976 OptionalImmIndexMap &OptionalIdx);
1977
1978 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1979 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1980 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1981
1982 bool parseDimId(unsigned &Encoding);
1983 ParseStatus parseDim(OperandVector &Operands);
1984 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1985 ParseStatus parseDPP8(OperandVector &Operands);
1986 ParseStatus parseDPPCtrl(OperandVector &Operands);
1987 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1988 int64_t parseDPPCtrlSel(StringRef Ctrl);
1989 int64_t parseDPPCtrlPerm();
1990 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1991 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1992 cvtDPP(Inst, Operands, true);
1993 }
1994 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1995 bool IsDPP8 = false);
1996 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1997 cvtVOP3DPP(Inst, Operands, true);
1998 }
1999
2000 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2001 AMDGPUOperand::ImmTy Type);
2002 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2003 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2004 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2005 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2006 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2007 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2008 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2009 uint64_t BasicInstType,
2010 bool SkipDstVcc = false,
2011 bool SkipSrcVcc = false);
2012
2013 ParseStatus parseEndpgm(OperandVector &Operands);
2014
2015 ParseStatus parseVOPD(OperandVector &Operands);
2016};
2017
2018} // end anonymous namespace
2019
2020// May be called with integer type with equivalent bitwidth.
2021static const fltSemantics *getFltSemantics(unsigned Size) {
2022 switch (Size) {
2023 case 4:
2024 return &APFloat::IEEEsingle();
2025 case 8:
2026 return &APFloat::IEEEdouble();
2027 case 2:
2028 return &APFloat::IEEEhalf();
2029 default:
2030 llvm_unreachable("unsupported fp type");
2031 }
2032}
2033
2035 return getFltSemantics(VT.getSizeInBits() / 8);
2036}
2037
2039 switch (OperandType) {
2040 // When floating-point immediate is used as operand of type i16, the 32-bit
2041 // representation of the constant truncated to the 16 LSBs should be used.
2056 return &APFloat::IEEEsingle();
2063 return &APFloat::IEEEdouble();
2071 return &APFloat::IEEEhalf();
2076 return &APFloat::BFloat();
2077 default:
2078 llvm_unreachable("unsupported fp type");
2079 }
2080}
2081
2082//===----------------------------------------------------------------------===//
2083// Operand
2084//===----------------------------------------------------------------------===//
2085
2086static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2087 bool Lost;
2088
2089 // Convert literal to single precision
2092 &Lost);
2093 // We allow precision lost but not overflow or underflow
2094 if (Status != APFloat::opOK &&
2095 Lost &&
2096 ((Status & APFloat::opOverflow) != 0 ||
2097 (Status & APFloat::opUnderflow) != 0)) {
2098 return false;
2099 }
2100
2101 return true;
2102}
2103
2104static bool isSafeTruncation(int64_t Val, unsigned Size) {
2105 return isUIntN(Size, Val) || isIntN(Size, Val);
2106}
2107
2108static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2109 if (VT.getScalarType() == MVT::i16)
2110 return isInlinableLiteral32(Val, HasInv2Pi);
2111
2112 if (VT.getScalarType() == MVT::f16)
2113 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2114
2115 assert(VT.getScalarType() == MVT::bf16);
2116
2117 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2118}
2119
2120bool AMDGPUOperand::isInlinableImm(MVT type) const {
2121
2122 // This is a hack to enable named inline values like
2123 // shared_base with both 32-bit and 64-bit operands.
2124 // Note that these values are defined as
2125 // 32-bit operands only.
2126 if (isInlineValue()) {
2127 return true;
2128 }
2129
2130 if (!isImmTy(ImmTyNone)) {
2131 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2132 return false;
2133 }
2134
2135 if (getModifiers().Lit != LitModifier::None)
2136 return false;
2137
2138 // TODO: We should avoid using host float here. It would be better to
2139 // check the float bit values which is what a few other places do.
2140 // We've had bot failures before due to weird NaN support on mips hosts.
2141
2142 APInt Literal(64, Imm.Val);
2143
2144 if (Imm.IsFPImm) { // We got fp literal token
2145 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2147 AsmParser->hasInv2PiInlineImm());
2148 }
2149
2150 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2151 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2152 return false;
2153
2154 if (type.getScalarSizeInBits() == 16) {
2155 bool Lost = false;
2156 switch (type.getScalarType().SimpleTy) {
2157 default:
2158 llvm_unreachable("unknown 16-bit type");
2159 case MVT::bf16:
2160 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2161 &Lost);
2162 break;
2163 case MVT::f16:
2164 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2165 &Lost);
2166 break;
2167 case MVT::i16:
2168 FPLiteral.convert(APFloatBase::IEEEsingle(),
2169 APFloat::rmNearestTiesToEven, &Lost);
2170 break;
2171 }
2172 // We need to use 32-bit representation here because when a floating-point
2173 // inline constant is used as an i16 operand, its 32-bit representation
2174 // representation will be used. We will need the 32-bit value to check if
2175 // it is FP inline constant.
2176 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2177 return isInlineableLiteralOp16(ImmVal, type,
2178 AsmParser->hasInv2PiInlineImm());
2179 }
2180
2181 // Check if single precision literal is inlinable
2183 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2184 AsmParser->hasInv2PiInlineImm());
2185 }
2186
2187 // We got int literal token.
2188 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2190 AsmParser->hasInv2PiInlineImm());
2191 }
2192
2193 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2194 return false;
2195 }
2196
2197 if (type.getScalarSizeInBits() == 16) {
2199 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2200 type, AsmParser->hasInv2PiInlineImm());
2201 }
2202
2204 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2205 AsmParser->hasInv2PiInlineImm());
2206}
2207
2208bool AMDGPUOperand::isLiteralImm(MVT type) const {
2209 // Check that this immediate can be added as literal
2210 if (!isImmTy(ImmTyNone)) {
2211 return false;
2212 }
2213
2214 bool Allow64Bit =
2215 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2216
2217 if (!Imm.IsFPImm) {
2218 // We got int literal token.
2219
2220 if (type == MVT::f64 && hasFPModifiers()) {
2221 // Cannot apply fp modifiers to int literals preserving the same semantics
2222 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2223 // disable these cases.
2224 return false;
2225 }
2226
2227 unsigned Size = type.getSizeInBits();
2228 if (Size == 64) {
2229 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2230 return true;
2231 Size = 32;
2232 }
2233
2234 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2235 // types.
2236 return isSafeTruncation(Imm.Val, Size);
2237 }
2238
2239 // We got fp literal token
2240 if (type == MVT::f64) { // Expected 64-bit fp operand
2241 // We would set low 64-bits of literal to zeroes but we accept this literals
2242 return true;
2243 }
2244
2245 if (type == MVT::i64) { // Expected 64-bit int operand
2246 // We don't allow fp literals in 64-bit integer instructions. It is
2247 // unclear how we should encode them.
2248 return false;
2249 }
2250
2251 // We allow fp literals with f16x2 operands assuming that the specified
2252 // literal goes into the lower half and the upper half is zero. We also
2253 // require that the literal may be losslessly converted to f16.
2254 //
2255 // For i16x2 operands, we assume that the specified literal is encoded as a
2256 // single-precision float. This is pretty odd, but it matches SP3 and what
2257 // happens in hardware.
2258 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2259 : (type == MVT::v2i16) ? MVT::f32
2260 : (type == MVT::v2f32) ? MVT::f32
2261 : type;
2262
2263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2264 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2265}
2266
2267bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2268 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2269}
2270
2271bool AMDGPUOperand::isVRegWithInputMods() const {
2272 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2273 // GFX90A allows DPP on 64-bit operands.
2274 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2275 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2276}
2277
2278template <bool IsFake16>
2279bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2280 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2281 : AMDGPU::VGPR_16_Lo128RegClassID);
2282}
2283
2284template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2285 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2286 : AMDGPU::VGPR_16RegClassID);
2287}
2288
2289bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2290 if (AsmParser->isVI())
2291 return isVReg32();
2292 if (AsmParser->isGFX9Plus())
2293 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2294 return false;
2295}
2296
2297bool AMDGPUOperand::isSDWAFP16Operand() const {
2298 return isSDWAOperand(MVT::f16);
2299}
2300
2301bool AMDGPUOperand::isSDWAFP32Operand() const {
2302 return isSDWAOperand(MVT::f32);
2303}
2304
2305bool AMDGPUOperand::isSDWAInt16Operand() const {
2306 return isSDWAOperand(MVT::i16);
2307}
2308
2309bool AMDGPUOperand::isSDWAInt32Operand() const {
2310 return isSDWAOperand(MVT::i32);
2311}
2312
2313bool AMDGPUOperand::isBoolReg() const {
2314 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2315 (AsmParser->isWave32() && isSCSrc_b32()));
2316}
2317
2318uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2319{
2320 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2321 assert(Size == 2 || Size == 4 || Size == 8);
2322
2323 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2324
2325 if (Imm.Mods.Abs) {
2326 Val &= ~FpSignMask;
2327 }
2328 if (Imm.Mods.Neg) {
2329 Val ^= FpSignMask;
2330 }
2331
2332 return Val;
2333}
2334
2335void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2336 MCOpIdx = Inst.getNumOperands();
2337
2338 if (isExpr()) {
2340 return;
2341 }
2342
2343 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2344 Inst.getNumOperands())) {
2345 addLiteralImmOperand(Inst, Imm.Val,
2346 ApplyModifiers &
2347 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2348 } else {
2349 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2351 }
2352}
2353
2354void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2355 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2356 auto OpNum = Inst.getNumOperands();
2357 // Check that this operand accepts literals
2358 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2359
2360 if (ApplyModifiers) {
2361 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2362 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2363 Val = applyInputFPModifiers(Val, Size);
2364 }
2365
2366 APInt Literal(64, Val);
2367 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2368
2369 bool CanUse64BitLiterals =
2370 AsmParser->has64BitLiterals() &&
2371 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2372 LitModifier Lit = getModifiers().Lit;
2373 MCContext &Ctx = AsmParser->getContext();
2374
2375 if (Imm.IsFPImm) { // We got fp literal token
2376 switch (OpTy) {
2382 if (Lit == LitModifier::None &&
2384 AsmParser->hasInv2PiInlineImm())) {
2385 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2386 return;
2387 }
2388
2389 // Non-inlineable
2390 if (AMDGPU::isSISrcFPOperand(InstDesc,
2391 OpNum)) { // Expected 64-bit fp operand
2392 bool HasMandatoryLiteral =
2393 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2394 // For fp operands we check if low 32 bits are zeros
2395 if (Literal.getLoBits(32) != 0 &&
2396 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2397 !HasMandatoryLiteral) {
2398 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2399 Inst.getLoc(),
2400 "Can't encode literal as exact 64-bit floating-point operand. "
2401 "Low 32-bits will be set to zero");
2402 Val &= 0xffffffff00000000u;
2403 }
2404
2405 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2408 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2409 (isInt<32>(Val) || isUInt<32>(Val))) {
2410 // The floating-point operand will be verbalized as an
2411 // integer one. If that integer happens to fit 32 bits, on
2412 // re-assembling it will be intepreted as the high half of
2413 // the actual value, so we have to wrap it into lit64().
2414 Lit = LitModifier::Lit64;
2415 } else if (Lit == LitModifier::Lit) {
2416 // For FP64 operands lit() specifies the high half of the value.
2417 Val = Hi_32(Val);
2418 }
2419 }
2420 break;
2421 }
2422
2423 // We don't allow fp literals in 64-bit integer instructions. It is
2424 // unclear how we should encode them. This case should be checked earlier
2425 // in predicate methods (isLiteralImm())
2426 llvm_unreachable("fp literal in 64-bit integer instruction.");
2427
2429 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2430 (isInt<32>(Val) || isUInt<32>(Val)))
2431 Lit = LitModifier::Lit64;
2432 break;
2433
2438 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2439 Literal == 0x3fc45f306725feed) {
2440 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2441 // loss of precision. The constant represents ideomatic fp32 value of
2442 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2443 // bits. Prevent rounding below.
2444 Inst.addOperand(MCOperand::createImm(0x3e22));
2445 return;
2446 }
2447 [[fallthrough]];
2448
2470 bool lost;
2471 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2472 // Convert literal to single precision
2473 FPLiteral.convert(*getOpFltSemantics(OpTy),
2474 APFloat::rmNearestTiesToEven, &lost);
2475 // We allow precision lost but not overflow or underflow. This should be
2476 // checked earlier in isLiteralImm()
2477
2478 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2479 break;
2480 }
2481 default:
2482 llvm_unreachable("invalid operand size");
2483 }
2484
2485 if (Lit != LitModifier::None) {
2486 Inst.addOperand(
2488 } else {
2490 }
2491 return;
2492 }
2493
2494 // We got int literal token.
2495 // Only sign extend inline immediates.
2496 switch (OpTy) {
2511 break;
2512
2515 if (Lit == LitModifier::None &&
2516 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2518 return;
2519 }
2520
2521 // When the 32 MSBs are not zero (effectively means it can't be safely
2522 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2523 // the lit modifier is explicitly used, we need to truncate it to the 32
2524 // LSBs.
2525 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2526 Val = Lo_32(Val);
2527 break;
2528
2532 if (Lit == LitModifier::None &&
2533 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2535 return;
2536 }
2537
2538 // If the target doesn't support 64-bit literals, we need to use the
2539 // constant as the high 32 MSBs of a double-precision floating point value.
2540 if (!AsmParser->has64BitLiterals()) {
2541 Val = static_cast<uint64_t>(Val) << 32;
2542 } else {
2543 // Now the target does support 64-bit literals, there are two cases
2544 // where we still want to use src_literal encoding:
2545 // 1) explicitly forced by using lit modifier;
2546 // 2) the value is a valid 32-bit representation (signed or unsigned),
2547 // meanwhile not forced by lit64 modifier.
2548 if (Lit == LitModifier::Lit ||
2549 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2550 Val = static_cast<uint64_t>(Val) << 32;
2551 }
2552
2553 // For FP64 operands lit() specifies the high half of the value.
2554 if (Lit == LitModifier::Lit)
2555 Val = Hi_32(Val);
2556 break;
2557
2569 break;
2570
2572 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2573 Val <<= 32;
2574 break;
2575
2576 default:
2577 llvm_unreachable("invalid operand type");
2578 }
2579
2580 if (Lit != LitModifier::None) {
2581 Inst.addOperand(
2583 } else {
2585 }
2586}
2587
2588void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2589 MCOpIdx = Inst.getNumOperands();
2590 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2591}
2592
2593bool AMDGPUOperand::isInlineValue() const {
2594 return isRegKind() && ::isInlineValue(getReg());
2595}
2596
2597//===----------------------------------------------------------------------===//
2598// AsmParser
2599//===----------------------------------------------------------------------===//
2600
2601void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2602 // TODO: make those pre-defined variables read-only.
2603 // Currently there is none suitable machinery in the core llvm-mc for this.
2604 // MCSymbol::isRedefinable is intended for another purpose, and
2605 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2606 MCContext &Ctx = getContext();
2607 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2609}
2610
2611static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2612 if (Is == IS_VGPR) {
2613 switch (RegWidth) {
2614 default: return -1;
2615 case 32:
2616 return AMDGPU::VGPR_32RegClassID;
2617 case 64:
2618 return AMDGPU::VReg_64RegClassID;
2619 case 96:
2620 return AMDGPU::VReg_96RegClassID;
2621 case 128:
2622 return AMDGPU::VReg_128RegClassID;
2623 case 160:
2624 return AMDGPU::VReg_160RegClassID;
2625 case 192:
2626 return AMDGPU::VReg_192RegClassID;
2627 case 224:
2628 return AMDGPU::VReg_224RegClassID;
2629 case 256:
2630 return AMDGPU::VReg_256RegClassID;
2631 case 288:
2632 return AMDGPU::VReg_288RegClassID;
2633 case 320:
2634 return AMDGPU::VReg_320RegClassID;
2635 case 352:
2636 return AMDGPU::VReg_352RegClassID;
2637 case 384:
2638 return AMDGPU::VReg_384RegClassID;
2639 case 512:
2640 return AMDGPU::VReg_512RegClassID;
2641 case 1024:
2642 return AMDGPU::VReg_1024RegClassID;
2643 }
2644 } else if (Is == IS_TTMP) {
2645 switch (RegWidth) {
2646 default: return -1;
2647 case 32:
2648 return AMDGPU::TTMP_32RegClassID;
2649 case 64:
2650 return AMDGPU::TTMP_64RegClassID;
2651 case 128:
2652 return AMDGPU::TTMP_128RegClassID;
2653 case 256:
2654 return AMDGPU::TTMP_256RegClassID;
2655 case 512:
2656 return AMDGPU::TTMP_512RegClassID;
2657 }
2658 } else if (Is == IS_SGPR) {
2659 switch (RegWidth) {
2660 default: return -1;
2661 case 32:
2662 return AMDGPU::SGPR_32RegClassID;
2663 case 64:
2664 return AMDGPU::SGPR_64RegClassID;
2665 case 96:
2666 return AMDGPU::SGPR_96RegClassID;
2667 case 128:
2668 return AMDGPU::SGPR_128RegClassID;
2669 case 160:
2670 return AMDGPU::SGPR_160RegClassID;
2671 case 192:
2672 return AMDGPU::SGPR_192RegClassID;
2673 case 224:
2674 return AMDGPU::SGPR_224RegClassID;
2675 case 256:
2676 return AMDGPU::SGPR_256RegClassID;
2677 case 288:
2678 return AMDGPU::SGPR_288RegClassID;
2679 case 320:
2680 return AMDGPU::SGPR_320RegClassID;
2681 case 352:
2682 return AMDGPU::SGPR_352RegClassID;
2683 case 384:
2684 return AMDGPU::SGPR_384RegClassID;
2685 case 512:
2686 return AMDGPU::SGPR_512RegClassID;
2687 }
2688 } else if (Is == IS_AGPR) {
2689 switch (RegWidth) {
2690 default: return -1;
2691 case 32:
2692 return AMDGPU::AGPR_32RegClassID;
2693 case 64:
2694 return AMDGPU::AReg_64RegClassID;
2695 case 96:
2696 return AMDGPU::AReg_96RegClassID;
2697 case 128:
2698 return AMDGPU::AReg_128RegClassID;
2699 case 160:
2700 return AMDGPU::AReg_160RegClassID;
2701 case 192:
2702 return AMDGPU::AReg_192RegClassID;
2703 case 224:
2704 return AMDGPU::AReg_224RegClassID;
2705 case 256:
2706 return AMDGPU::AReg_256RegClassID;
2707 case 288:
2708 return AMDGPU::AReg_288RegClassID;
2709 case 320:
2710 return AMDGPU::AReg_320RegClassID;
2711 case 352:
2712 return AMDGPU::AReg_352RegClassID;
2713 case 384:
2714 return AMDGPU::AReg_384RegClassID;
2715 case 512:
2716 return AMDGPU::AReg_512RegClassID;
2717 case 1024:
2718 return AMDGPU::AReg_1024RegClassID;
2719 }
2720 }
2721 return -1;
2722}
2723
2726 .Case("exec", AMDGPU::EXEC)
2727 .Case("vcc", AMDGPU::VCC)
2728 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2729 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2730 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2731 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2732 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2733 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2734 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2735 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2736 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2737 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2738 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2739 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2740 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2741 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2742 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2743 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2744 .Case("m0", AMDGPU::M0)
2745 .Case("vccz", AMDGPU::SRC_VCCZ)
2746 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2747 .Case("execz", AMDGPU::SRC_EXECZ)
2748 .Case("src_execz", AMDGPU::SRC_EXECZ)
2749 .Case("scc", AMDGPU::SRC_SCC)
2750 .Case("src_scc", AMDGPU::SRC_SCC)
2751 .Case("tba", AMDGPU::TBA)
2752 .Case("tma", AMDGPU::TMA)
2753 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2754 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2755 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2756 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2757 .Case("vcc_lo", AMDGPU::VCC_LO)
2758 .Case("vcc_hi", AMDGPU::VCC_HI)
2759 .Case("exec_lo", AMDGPU::EXEC_LO)
2760 .Case("exec_hi", AMDGPU::EXEC_HI)
2761 .Case("tma_lo", AMDGPU::TMA_LO)
2762 .Case("tma_hi", AMDGPU::TMA_HI)
2763 .Case("tba_lo", AMDGPU::TBA_LO)
2764 .Case("tba_hi", AMDGPU::TBA_HI)
2765 .Case("pc", AMDGPU::PC_REG)
2766 .Case("null", AMDGPU::SGPR_NULL)
2767 .Default(AMDGPU::NoRegister);
2768}
2769
2770bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2771 SMLoc &EndLoc, bool RestoreOnFailure) {
2772 auto R = parseRegister();
2773 if (!R) return true;
2774 assert(R->isReg());
2775 RegNo = R->getReg();
2776 StartLoc = R->getStartLoc();
2777 EndLoc = R->getEndLoc();
2778 return false;
2779}
2780
2781bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2782 SMLoc &EndLoc) {
2783 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2784}
2785
2786ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2787 SMLoc &EndLoc) {
2788 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2789 bool PendingErrors = getParser().hasPendingError();
2790 getParser().clearPendingErrors();
2791 if (PendingErrors)
2792 return ParseStatus::Failure;
2793 if (Result)
2794 return ParseStatus::NoMatch;
2795 return ParseStatus::Success;
2796}
2797
2798bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2799 RegisterKind RegKind,
2800 MCRegister Reg1, SMLoc Loc) {
2801 switch (RegKind) {
2802 case IS_SPECIAL:
2803 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2804 Reg = AMDGPU::EXEC;
2805 RegWidth = 64;
2806 return true;
2807 }
2808 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2809 Reg = AMDGPU::FLAT_SCR;
2810 RegWidth = 64;
2811 return true;
2812 }
2813 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2814 Reg = AMDGPU::XNACK_MASK;
2815 RegWidth = 64;
2816 return true;
2817 }
2818 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2819 Reg = AMDGPU::VCC;
2820 RegWidth = 64;
2821 return true;
2822 }
2823 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2824 Reg = AMDGPU::TBA;
2825 RegWidth = 64;
2826 return true;
2827 }
2828 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2829 Reg = AMDGPU::TMA;
2830 RegWidth = 64;
2831 return true;
2832 }
2833 Error(Loc, "register does not fit in the list");
2834 return false;
2835 case IS_VGPR:
2836 case IS_SGPR:
2837 case IS_AGPR:
2838 case IS_TTMP:
2839 if (Reg1 != Reg + RegWidth / 32) {
2840 Error(Loc, "registers in a list must have consecutive indices");
2841 return false;
2842 }
2843 RegWidth += 32;
2844 return true;
2845 default:
2846 llvm_unreachable("unexpected register kind");
2847 }
2848}
2849
2850struct RegInfo {
2852 RegisterKind Kind;
2853};
2854
2855static constexpr RegInfo RegularRegisters[] = {
2856 {{"v"}, IS_VGPR},
2857 {{"s"}, IS_SGPR},
2858 {{"ttmp"}, IS_TTMP},
2859 {{"acc"}, IS_AGPR},
2860 {{"a"}, IS_AGPR},
2861};
2862
2863static bool isRegularReg(RegisterKind Kind) {
2864 return Kind == IS_VGPR ||
2865 Kind == IS_SGPR ||
2866 Kind == IS_TTMP ||
2867 Kind == IS_AGPR;
2868}
2869
2871 for (const RegInfo &Reg : RegularRegisters)
2872 if (Str.starts_with(Reg.Name))
2873 return &Reg;
2874 return nullptr;
2875}
2876
2877static bool getRegNum(StringRef Str, unsigned& Num) {
2878 return !Str.getAsInteger(10, Num);
2879}
2880
2881bool
2882AMDGPUAsmParser::isRegister(const AsmToken &Token,
2883 const AsmToken &NextToken) const {
2884
2885 // A list of consecutive registers: [s0,s1,s2,s3]
2886 if (Token.is(AsmToken::LBrac))
2887 return true;
2888
2889 if (!Token.is(AsmToken::Identifier))
2890 return false;
2891
2892 // A single register like s0 or a range of registers like s[0:1]
2893
2894 StringRef Str = Token.getString();
2895 const RegInfo *Reg = getRegularRegInfo(Str);
2896 if (Reg) {
2897 StringRef RegName = Reg->Name;
2898 StringRef RegSuffix = Str.substr(RegName.size());
2899 if (!RegSuffix.empty()) {
2900 RegSuffix.consume_back(".l");
2901 RegSuffix.consume_back(".h");
2902 unsigned Num;
2903 // A single register with an index: rXX
2904 if (getRegNum(RegSuffix, Num))
2905 return true;
2906 } else {
2907 // A range of registers: r[XX:YY].
2908 if (NextToken.is(AsmToken::LBrac))
2909 return true;
2910 }
2911 }
2912
2913 return getSpecialRegForName(Str).isValid();
2914}
2915
2916bool
2917AMDGPUAsmParser::isRegister()
2918{
2919 return isRegister(getToken(), peekToken());
2920}
2921
2922MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2923 unsigned SubReg, unsigned RegWidth,
2924 SMLoc Loc) {
2925 assert(isRegularReg(RegKind));
2926
2927 unsigned AlignSize = 1;
2928 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2929 // SGPR and TTMP registers must be aligned.
2930 // Max required alignment is 4 dwords.
2931 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2932 }
2933
2934 if (RegNum % AlignSize != 0) {
2935 Error(Loc, "invalid register alignment");
2936 return MCRegister();
2937 }
2938
2939 unsigned RegIdx = RegNum / AlignSize;
2940 int RCID = getRegClass(RegKind, RegWidth);
2941 if (RCID == -1) {
2942 Error(Loc, "invalid or unsupported register size");
2943 return MCRegister();
2944 }
2945
2946 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2947 const MCRegisterClass RC = TRI->getRegClass(RCID);
2948 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2949 Error(Loc, "register index is out of range");
2950 return AMDGPU::NoRegister;
2951 }
2952
2953 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2954 Error(Loc, "register index is out of range");
2955 return MCRegister();
2956 }
2957
2958 MCRegister Reg = RC.getRegister(RegIdx);
2959
2960 if (SubReg) {
2961 Reg = TRI->getSubReg(Reg, SubReg);
2962
2963 // Currently all regular registers have their .l and .h subregisters, so
2964 // we should never need to generate an error here.
2965 assert(Reg && "Invalid subregister!");
2966 }
2967
2968 return Reg;
2969}
2970
2971bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2972 unsigned &SubReg) {
2973 int64_t RegLo, RegHi;
2974 if (!skipToken(AsmToken::LBrac, "missing register index"))
2975 return false;
2976
2977 SMLoc FirstIdxLoc = getLoc();
2978 SMLoc SecondIdxLoc;
2979
2980 if (!parseExpr(RegLo))
2981 return false;
2982
2983 if (trySkipToken(AsmToken::Colon)) {
2984 SecondIdxLoc = getLoc();
2985 if (!parseExpr(RegHi))
2986 return false;
2987 } else {
2988 RegHi = RegLo;
2989 }
2990
2991 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2992 return false;
2993
2994 if (!isUInt<32>(RegLo)) {
2995 Error(FirstIdxLoc, "invalid register index");
2996 return false;
2997 }
2998
2999 if (!isUInt<32>(RegHi)) {
3000 Error(SecondIdxLoc, "invalid register index");
3001 return false;
3002 }
3003
3004 if (RegLo > RegHi) {
3005 Error(FirstIdxLoc, "first register index should not exceed second index");
3006 return false;
3007 }
3008
3009 if (RegHi == RegLo) {
3010 StringRef RegSuffix = getTokenStr();
3011 if (RegSuffix == ".l") {
3012 SubReg = AMDGPU::lo16;
3013 lex();
3014 } else if (RegSuffix == ".h") {
3015 SubReg = AMDGPU::hi16;
3016 lex();
3017 }
3018 }
3019
3020 Num = static_cast<unsigned>(RegLo);
3021 RegWidth = 32 * ((RegHi - RegLo) + 1);
3022
3023 return true;
3024}
3025
3026MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3027 unsigned &RegNum,
3028 unsigned &RegWidth,
3029 SmallVectorImpl<AsmToken> &Tokens) {
3030 assert(isToken(AsmToken::Identifier));
3031 MCRegister Reg = getSpecialRegForName(getTokenStr());
3032 if (Reg) {
3033 RegNum = 0;
3034 RegWidth = 32;
3035 RegKind = IS_SPECIAL;
3036 Tokens.push_back(getToken());
3037 lex(); // skip register name
3038 }
3039 return Reg;
3040}
3041
3042MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3043 unsigned &RegNum,
3044 unsigned &RegWidth,
3045 SmallVectorImpl<AsmToken> &Tokens) {
3046 assert(isToken(AsmToken::Identifier));
3047 StringRef RegName = getTokenStr();
3048 auto Loc = getLoc();
3049
3050 const RegInfo *RI = getRegularRegInfo(RegName);
3051 if (!RI) {
3052 Error(Loc, "invalid register name");
3053 return MCRegister();
3054 }
3055
3056 Tokens.push_back(getToken());
3057 lex(); // skip register name
3058
3059 RegKind = RI->Kind;
3060 StringRef RegSuffix = RegName.substr(RI->Name.size());
3061 unsigned SubReg = NoSubRegister;
3062 if (!RegSuffix.empty()) {
3063 if (RegSuffix.consume_back(".l"))
3064 SubReg = AMDGPU::lo16;
3065 else if (RegSuffix.consume_back(".h"))
3066 SubReg = AMDGPU::hi16;
3067
3068 // Single 32-bit register: vXX.
3069 if (!getRegNum(RegSuffix, RegNum)) {
3070 Error(Loc, "invalid register index");
3071 return MCRegister();
3072 }
3073 RegWidth = 32;
3074 } else {
3075 // Range of registers: v[XX:YY]. ":YY" is optional.
3076 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3077 return MCRegister();
3078 }
3079
3080 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3081}
3082
3083MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3084 unsigned &RegNum, unsigned &RegWidth,
3085 SmallVectorImpl<AsmToken> &Tokens) {
3086 MCRegister Reg;
3087 auto ListLoc = getLoc();
3088
3089 if (!skipToken(AsmToken::LBrac,
3090 "expected a register or a list of registers")) {
3091 return MCRegister();
3092 }
3093
3094 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3095
3096 auto Loc = getLoc();
3097 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3098 return MCRegister();
3099 if (RegWidth != 32) {
3100 Error(Loc, "expected a single 32-bit register");
3101 return MCRegister();
3102 }
3103
3104 for (; trySkipToken(AsmToken::Comma); ) {
3105 RegisterKind NextRegKind;
3106 MCRegister NextReg;
3107 unsigned NextRegNum, NextRegWidth;
3108 Loc = getLoc();
3109
3110 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3111 NextRegNum, NextRegWidth,
3112 Tokens)) {
3113 return MCRegister();
3114 }
3115 if (NextRegWidth != 32) {
3116 Error(Loc, "expected a single 32-bit register");
3117 return MCRegister();
3118 }
3119 if (NextRegKind != RegKind) {
3120 Error(Loc, "registers in a list must be of the same kind");
3121 return MCRegister();
3122 }
3123 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3124 return MCRegister();
3125 }
3126
3127 if (!skipToken(AsmToken::RBrac,
3128 "expected a comma or a closing square bracket")) {
3129 return MCRegister();
3130 }
3131
3132 if (isRegularReg(RegKind))
3133 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3134
3135 return Reg;
3136}
3137
3138bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3139 MCRegister &Reg, unsigned &RegNum,
3140 unsigned &RegWidth,
3141 SmallVectorImpl<AsmToken> &Tokens) {
3142 auto Loc = getLoc();
3143 Reg = MCRegister();
3144
3145 if (isToken(AsmToken::Identifier)) {
3146 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3147 if (!Reg)
3148 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3149 } else {
3150 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3151 }
3152
3153 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3154 if (!Reg) {
3155 assert(Parser.hasPendingError());
3156 return false;
3157 }
3158
3159 if (!subtargetHasRegister(*TRI, Reg)) {
3160 if (Reg == AMDGPU::SGPR_NULL) {
3161 Error(Loc, "'null' operand is not supported on this GPU");
3162 } else {
3164 " register not available on this GPU");
3165 }
3166 return false;
3167 }
3168
3169 return true;
3170}
3171
3172bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3173 MCRegister &Reg, unsigned &RegNum,
3174 unsigned &RegWidth,
3175 bool RestoreOnFailure /*=false*/) {
3176 Reg = MCRegister();
3177
3179 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3180 if (RestoreOnFailure) {
3181 while (!Tokens.empty()) {
3182 getLexer().UnLex(Tokens.pop_back_val());
3183 }
3184 }
3185 return true;
3186 }
3187 return false;
3188}
3189
3190std::optional<StringRef>
3191AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3192 switch (RegKind) {
3193 case IS_VGPR:
3194 return StringRef(".amdgcn.next_free_vgpr");
3195 case IS_SGPR:
3196 return StringRef(".amdgcn.next_free_sgpr");
3197 default:
3198 return std::nullopt;
3199 }
3200}
3201
3202void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3203 auto SymbolName = getGprCountSymbolName(RegKind);
3204 assert(SymbolName && "initializing invalid register kind");
3205 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3207 Sym->setRedefinable(true);
3208}
3209
3210bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3211 unsigned DwordRegIndex,
3212 unsigned RegWidth) {
3213 // Symbols are only defined for GCN targets
3214 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3215 return true;
3216
3217 auto SymbolName = getGprCountSymbolName(RegKind);
3218 if (!SymbolName)
3219 return true;
3220 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3221
3222 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3223 int64_t OldCount;
3224
3225 if (!Sym->isVariable())
3226 return !Error(getLoc(),
3227 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3228 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3229 return !Error(
3230 getLoc(),
3231 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3232
3233 if (OldCount <= NewMax)
3235
3236 return true;
3237}
3238
3239std::unique_ptr<AMDGPUOperand>
3240AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3241 const auto &Tok = getToken();
3242 SMLoc StartLoc = Tok.getLoc();
3243 SMLoc EndLoc = Tok.getEndLoc();
3244 RegisterKind RegKind;
3245 MCRegister Reg;
3246 unsigned RegNum, RegWidth;
3247
3248 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3249 return nullptr;
3250 }
3251 if (isHsaAbi(getSTI())) {
3252 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3253 return nullptr;
3254 } else
3255 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3256 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3257}
3258
3259ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3260 bool HasSP3AbsModifier, LitModifier Lit) {
3261 // TODO: add syntactic sugar for 1/(2*PI)
3262
3263 if (isRegister() || isModifier())
3264 return ParseStatus::NoMatch;
3265
3266 if (Lit == LitModifier::None) {
3267 if (trySkipId("lit"))
3268 Lit = LitModifier::Lit;
3269 else if (trySkipId("lit64"))
3270 Lit = LitModifier::Lit64;
3271
3272 if (Lit != LitModifier::None) {
3273 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3274 return ParseStatus::Failure;
3275 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3276 if (S.isSuccess() &&
3277 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3278 return ParseStatus::Failure;
3279 return S;
3280 }
3281 }
3282
3283 const auto& Tok = getToken();
3284 const auto& NextTok = peekToken();
3285 bool IsReal = Tok.is(AsmToken::Real);
3286 SMLoc S = getLoc();
3287 bool Negate = false;
3288
3289 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3290 lex();
3291 IsReal = true;
3292 Negate = true;
3293 }
3294
3295 AMDGPUOperand::Modifiers Mods;
3296 Mods.Lit = Lit;
3297
3298 if (IsReal) {
3299 // Floating-point expressions are not supported.
3300 // Can only allow floating-point literals with an
3301 // optional sign.
3302
3303 StringRef Num = getTokenStr();
3304 lex();
3305
3306 APFloat RealVal(APFloat::IEEEdouble());
3307 auto roundMode = APFloat::rmNearestTiesToEven;
3308 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3309 return ParseStatus::Failure;
3310 if (Negate)
3311 RealVal.changeSign();
3312
3313 Operands.push_back(
3314 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3315 AMDGPUOperand::ImmTyNone, true));
3316 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3317 Op.setModifiers(Mods);
3318
3319 return ParseStatus::Success;
3320
3321 } else {
3322 int64_t IntVal;
3323 const MCExpr *Expr;
3324 SMLoc S = getLoc();
3325
3326 if (HasSP3AbsModifier) {
3327 // This is a workaround for handling expressions
3328 // as arguments of SP3 'abs' modifier, for example:
3329 // |1.0|
3330 // |-1|
3331 // |1+x|
3332 // This syntax is not compatible with syntax of standard
3333 // MC expressions (due to the trailing '|').
3334 SMLoc EndLoc;
3335 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3336 return ParseStatus::Failure;
3337 } else {
3338 if (Parser.parseExpression(Expr))
3339 return ParseStatus::Failure;
3340 }
3341
3342 if (Expr->evaluateAsAbsolute(IntVal)) {
3343 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3344 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3345 Op.setModifiers(Mods);
3346 } else {
3347 if (Lit != LitModifier::None)
3348 return ParseStatus::NoMatch;
3349 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3350 }
3351
3352 return ParseStatus::Success;
3353 }
3354
3355 return ParseStatus::NoMatch;
3356}
3357
3358ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3359 if (!isRegister())
3360 return ParseStatus::NoMatch;
3361
3362 if (auto R = parseRegister()) {
3363 assert(R->isReg());
3364 Operands.push_back(std::move(R));
3365 return ParseStatus::Success;
3366 }
3367 return ParseStatus::Failure;
3368}
3369
3370ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3371 bool HasSP3AbsMod, LitModifier Lit) {
3372 ParseStatus Res = parseReg(Operands);
3373 if (!Res.isNoMatch())
3374 return Res;
3375 if (isModifier())
3376 return ParseStatus::NoMatch;
3377 return parseImm(Operands, HasSP3AbsMod, Lit);
3378}
3379
3380bool
3381AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3382 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3383 const auto &str = Token.getString();
3384 return str == "abs" || str == "neg" || str == "sext";
3385 }
3386 return false;
3387}
3388
3389bool
3390AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3391 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3392}
3393
3394bool
3395AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3396 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3397}
3398
3399bool
3400AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3401 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3402}
3403
3404// Check if this is an operand modifier or an opcode modifier
3405// which may look like an expression but it is not. We should
3406// avoid parsing these modifiers as expressions. Currently
3407// recognized sequences are:
3408// |...|
3409// abs(...)
3410// neg(...)
3411// sext(...)
3412// -reg
3413// -|...|
3414// -abs(...)
3415// name:...
3416//
3417bool
3418AMDGPUAsmParser::isModifier() {
3419
3420 AsmToken Tok = getToken();
3421 AsmToken NextToken[2];
3422 peekTokens(NextToken);
3423
3424 return isOperandModifier(Tok, NextToken[0]) ||
3425 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3426 isOpcodeModifierWithVal(Tok, NextToken[0]);
3427}
3428
3429// Check if the current token is an SP3 'neg' modifier.
3430// Currently this modifier is allowed in the following context:
3431//
3432// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3433// 2. Before an 'abs' modifier: -abs(...)
3434// 3. Before an SP3 'abs' modifier: -|...|
3435//
3436// In all other cases "-" is handled as a part
3437// of an expression that follows the sign.
3438//
3439// Note: When "-" is followed by an integer literal,
3440// this is interpreted as integer negation rather
3441// than a floating-point NEG modifier applied to N.
3442// Beside being contr-intuitive, such use of floating-point
3443// NEG modifier would have resulted in different meaning
3444// of integer literals used with VOP1/2/C and VOP3,
3445// for example:
3446// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3447// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3448// Negative fp literals with preceding "-" are
3449// handled likewise for uniformity
3450//
3451bool
3452AMDGPUAsmParser::parseSP3NegModifier() {
3453
3454 AsmToken NextToken[2];
3455 peekTokens(NextToken);
3456
3457 if (isToken(AsmToken::Minus) &&
3458 (isRegister(NextToken[0], NextToken[1]) ||
3459 NextToken[0].is(AsmToken::Pipe) ||
3460 isId(NextToken[0], "abs"))) {
3461 lex();
3462 return true;
3463 }
3464
3465 return false;
3466}
3467
3468ParseStatus
3469AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3470 bool AllowImm) {
3471 bool Neg, SP3Neg;
3472 bool Abs, SP3Abs;
3473 SMLoc Loc;
3474
3475 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3476 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3477 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3478
3479 SP3Neg = parseSP3NegModifier();
3480
3481 Loc = getLoc();
3482 Neg = trySkipId("neg");
3483 if (Neg && SP3Neg)
3484 return Error(Loc, "expected register or immediate");
3485 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3486 return ParseStatus::Failure;
3487
3488 Abs = trySkipId("abs");
3489 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3490 return ParseStatus::Failure;
3491
3492 LitModifier Lit = LitModifier::None;
3493 if (trySkipId("lit")) {
3494 Lit = LitModifier::Lit;
3495 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3496 return ParseStatus::Failure;
3497 } else if (trySkipId("lit64")) {
3498 Lit = LitModifier::Lit64;
3499 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3500 return ParseStatus::Failure;
3501 if (!has64BitLiterals())
3502 return Error(Loc, "lit64 is not supported on this GPU");
3503 }
3504
3505 Loc = getLoc();
3506 SP3Abs = trySkipToken(AsmToken::Pipe);
3507 if (Abs && SP3Abs)
3508 return Error(Loc, "expected register or immediate");
3509
3510 ParseStatus Res;
3511 if (AllowImm) {
3512 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3513 } else {
3514 Res = parseReg(Operands);
3515 }
3516 if (!Res.isSuccess())
3517 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3519 : Res;
3520
3521 if (Lit != LitModifier::None && !Operands.back()->isImm())
3522 Error(Loc, "expected immediate with lit modifier");
3523
3524 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3525 return ParseStatus::Failure;
3526 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3527 return ParseStatus::Failure;
3528 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3529 return ParseStatus::Failure;
3530 if (Lit != LitModifier::None &&
3531 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3532 return ParseStatus::Failure;
3533
3534 AMDGPUOperand::Modifiers Mods;
3535 Mods.Abs = Abs || SP3Abs;
3536 Mods.Neg = Neg || SP3Neg;
3537 Mods.Lit = Lit;
3538
3539 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3540 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3541 if (Op.isExpr())
3542 return Error(Op.getStartLoc(), "expected an absolute expression");
3543 Op.setModifiers(Mods);
3544 }
3545 return ParseStatus::Success;
3546}
3547
3548ParseStatus
3549AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3550 bool AllowImm) {
3551 bool Sext = trySkipId("sext");
3552 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3553 return ParseStatus::Failure;
3554
3555 ParseStatus Res;
3556 if (AllowImm) {
3557 Res = parseRegOrImm(Operands);
3558 } else {
3559 Res = parseReg(Operands);
3560 }
3561 if (!Res.isSuccess())
3562 return Sext ? ParseStatus::Failure : Res;
3563
3564 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3565 return ParseStatus::Failure;
3566
3567 AMDGPUOperand::Modifiers Mods;
3568 Mods.Sext = Sext;
3569
3570 if (Mods.hasIntModifiers()) {
3571 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3572 if (Op.isExpr())
3573 return Error(Op.getStartLoc(), "expected an absolute expression");
3574 Op.setModifiers(Mods);
3575 }
3576
3577 return ParseStatus::Success;
3578}
3579
3580ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3581 return parseRegOrImmWithFPInputMods(Operands, false);
3582}
3583
3584ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3585 return parseRegOrImmWithIntInputMods(Operands, false);
3586}
3587
3588ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3589 auto Loc = getLoc();
3590 if (trySkipId("off")) {
3591 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3592 AMDGPUOperand::ImmTyOff, false));
3593 return ParseStatus::Success;
3594 }
3595
3596 if (!isRegister())
3597 return ParseStatus::NoMatch;
3598
3599 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3600 if (Reg) {
3601 Operands.push_back(std::move(Reg));
3602 return ParseStatus::Success;
3603 }
3604
3605 return ParseStatus::Failure;
3606}
3607
3608unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3609 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3610
3611 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3612 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3613 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3614 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3615 return Match_InvalidOperand;
3616
3617 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3618 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3619 // v_mac_f32/16 allow only dst_sel == DWORD;
3620 auto OpNum =
3621 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3622 const auto &Op = Inst.getOperand(OpNum);
3623 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3624 return Match_InvalidOperand;
3625 }
3626 }
3627
3628 // Asm can first try to match VOPD or VOPD3. By failing early here with
3629 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3630 // Checking later during validateInstruction does not give a chance to retry
3631 // parsing as a different encoding.
3632 if (tryAnotherVOPDEncoding(Inst))
3633 return Match_InvalidOperand;
3634
3635 return Match_Success;
3636}
3637
3647
3648// What asm variants we should check
3649ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3650 if (isForcedDPP() && isForcedVOP3()) {
3651 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3652 return ArrayRef(Variants);
3653 }
3654 if (getForcedEncodingSize() == 32) {
3655 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3656 return ArrayRef(Variants);
3657 }
3658
3659 if (isForcedVOP3()) {
3660 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3661 return ArrayRef(Variants);
3662 }
3663
3664 if (isForcedSDWA()) {
3665 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3667 return ArrayRef(Variants);
3668 }
3669
3670 if (isForcedDPP()) {
3671 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3672 return ArrayRef(Variants);
3673 }
3674
3675 return getAllVariants();
3676}
3677
3678StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3679 if (isForcedDPP() && isForcedVOP3())
3680 return "e64_dpp";
3681
3682 if (getForcedEncodingSize() == 32)
3683 return "e32";
3684
3685 if (isForcedVOP3())
3686 return "e64";
3687
3688 if (isForcedSDWA())
3689 return "sdwa";
3690
3691 if (isForcedDPP())
3692 return "dpp";
3693
3694 return "";
3695}
3696
3697MCRegister
3698AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3699 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3700 for (MCPhysReg Reg : Desc.implicit_uses()) {
3701 switch (Reg) {
3702 case AMDGPU::FLAT_SCR:
3703 case AMDGPU::VCC:
3704 case AMDGPU::VCC_LO:
3705 case AMDGPU::VCC_HI:
3706 case AMDGPU::M0:
3707 return Reg;
3708 default:
3709 break;
3710 }
3711 }
3712 return MCRegister();
3713}
3714
3715// NB: This code is correct only when used to check constant
3716// bus limitations because GFX7 support no f16 inline constants.
3717// Note that there are no cases when a GFX7 opcode violates
3718// constant bus limitations due to the use of an f16 constant.
3719bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3720 unsigned OpIdx) const {
3721 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3722
3725 return false;
3726 }
3727
3728 const MCOperand &MO = Inst.getOperand(OpIdx);
3729
3730 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3731 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3732
3733 switch (OpSize) { // expected operand size
3734 case 8:
3735 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3736 case 4:
3737 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3738 case 2: {
3739 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3742 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3743
3747
3751
3754
3758
3761 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3762
3765 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3766
3768 return false;
3769
3770 llvm_unreachable("invalid operand type");
3771 }
3772 default:
3773 llvm_unreachable("invalid operand size");
3774 }
3775}
3776
3777unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3778 if (!isGFX10Plus())
3779 return 1;
3780
3781 switch (Opcode) {
3782 // 64-bit shift instructions can use only one scalar value input
3783 case AMDGPU::V_LSHLREV_B64_e64:
3784 case AMDGPU::V_LSHLREV_B64_gfx10:
3785 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3786 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3787 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3788 case AMDGPU::V_LSHRREV_B64_e64:
3789 case AMDGPU::V_LSHRREV_B64_gfx10:
3790 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3791 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3792 case AMDGPU::V_ASHRREV_I64_e64:
3793 case AMDGPU::V_ASHRREV_I64_gfx10:
3794 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3795 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3796 case AMDGPU::V_LSHL_B64_e64:
3797 case AMDGPU::V_LSHR_B64_e64:
3798 case AMDGPU::V_ASHR_I64_e64:
3799 return 1;
3800 default:
3801 return 2;
3802 }
3803}
3804
3805constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3807
3808// Get regular operand indices in the same order as specified
3809// in the instruction (but append mandatory literals to the end).
3811 bool AddMandatoryLiterals = false) {
3812
3813 int16_t ImmIdx =
3814 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3815
3816 if (isVOPD(Opcode)) {
3817 int16_t ImmXIdx =
3818 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3819
3820 return {getNamedOperandIdx(Opcode, OpName::src0X),
3821 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3822 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3823 getNamedOperandIdx(Opcode, OpName::src0Y),
3824 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3825 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3826 ImmXIdx,
3827 ImmIdx};
3828 }
3829
3830 return {getNamedOperandIdx(Opcode, OpName::src0),
3831 getNamedOperandIdx(Opcode, OpName::src1),
3832 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3833}
3834
3835bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3836 const MCOperand &MO = Inst.getOperand(OpIdx);
3837 if (MO.isImm())
3838 return !isInlineConstant(Inst, OpIdx);
3839 if (MO.isReg()) {
3840 auto Reg = MO.getReg();
3841 if (!Reg)
3842 return false;
3843 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3844 auto PReg = mc2PseudoReg(Reg);
3845 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3846 }
3847 return true;
3848}
3849
3850// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3851// Writelane is special in that it can use SGPR and M0 (which would normally
3852// count as using the constant bus twice - but in this case it is allowed since
3853// the lane selector doesn't count as a use of the constant bus). However, it is
3854// still required to abide by the 1 SGPR rule.
3855static bool checkWriteLane(const MCInst &Inst) {
3856 const unsigned Opcode = Inst.getOpcode();
3857 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3858 return false;
3859 const MCOperand &LaneSelOp = Inst.getOperand(2);
3860 if (!LaneSelOp.isReg())
3861 return false;
3862 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3863 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3864}
3865
3866bool AMDGPUAsmParser::validateConstantBusLimitations(
3867 const MCInst &Inst, const OperandVector &Operands) {
3868 const unsigned Opcode = Inst.getOpcode();
3869 const MCInstrDesc &Desc = MII.get(Opcode);
3870 MCRegister LastSGPR;
3871 unsigned ConstantBusUseCount = 0;
3872 unsigned NumLiterals = 0;
3873 unsigned LiteralSize;
3874
3875 if (!(Desc.TSFlags &
3878 !isVOPD(Opcode))
3879 return true;
3880
3881 if (checkWriteLane(Inst))
3882 return true;
3883
3884 // Check special imm operands (used by madmk, etc)
3885 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3886 ++NumLiterals;
3887 LiteralSize = 4;
3888 }
3889
3890 SmallDenseSet<MCRegister> SGPRsUsed;
3891 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3892 if (SGPRUsed) {
3893 SGPRsUsed.insert(SGPRUsed);
3894 ++ConstantBusUseCount;
3895 }
3896
3897 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3898
3899 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3900
3901 for (int OpIdx : OpIndices) {
3902 if (OpIdx == -1)
3903 continue;
3904
3905 const MCOperand &MO = Inst.getOperand(OpIdx);
3906 if (usesConstantBus(Inst, OpIdx)) {
3907 if (MO.isReg()) {
3908 LastSGPR = mc2PseudoReg(MO.getReg());
3909 // Pairs of registers with a partial intersections like these
3910 // s0, s[0:1]
3911 // flat_scratch_lo, flat_scratch
3912 // flat_scratch_lo, flat_scratch_hi
3913 // are theoretically valid but they are disabled anyway.
3914 // Note that this code mimics SIInstrInfo::verifyInstruction
3915 if (SGPRsUsed.insert(LastSGPR).second) {
3916 ++ConstantBusUseCount;
3917 }
3918 } else { // Expression or a literal
3919
3920 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3921 continue; // special operand like VINTERP attr_chan
3922
3923 // An instruction may use only one literal.
3924 // This has been validated on the previous step.
3925 // See validateVOPLiteral.
3926 // This literal may be used as more than one operand.
3927 // If all these operands are of the same size,
3928 // this literal counts as one scalar value.
3929 // Otherwise it counts as 2 scalar values.
3930 // See "GFX10 Shader Programming", section 3.6.2.3.
3931
3933 if (Size < 4)
3934 Size = 4;
3935
3936 if (NumLiterals == 0) {
3937 NumLiterals = 1;
3938 LiteralSize = Size;
3939 } else if (LiteralSize != Size) {
3940 NumLiterals = 2;
3941 }
3942 }
3943 }
3944
3945 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3946 Error(getOperandLoc(Operands, OpIdx),
3947 "invalid operand (violates constant bus restrictions)");
3948 return false;
3949 }
3950 }
3951 return true;
3952}
3953
3954std::optional<unsigned>
3955AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3956
3957 const unsigned Opcode = Inst.getOpcode();
3958 if (!isVOPD(Opcode))
3959 return {};
3960
3961 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3962
3963 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3964 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3965 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3966 ? Opr.getReg()
3967 : MCRegister();
3968 };
3969
3970 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3971 // source-cache.
3972 bool SkipSrc =
3973 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3974 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3975 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3976 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3977 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3978 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3979 bool AllowSameVGPR = isGFX1250Plus();
3980
3981 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3982 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3983 int I = getNamedOperandIdx(Opcode, OpName);
3984 const MCOperand &Op = Inst.getOperand(I);
3985 if (!Op.isImm())
3986 continue;
3987 int64_t Imm = Op.getImm();
3988 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3989 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3990 return (unsigned)I;
3991 }
3992
3993 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3994 OpName::vsrc2Y, OpName::imm}) {
3995 int I = getNamedOperandIdx(Opcode, OpName);
3996 if (I == -1)
3997 continue;
3998 const MCOperand &Op = Inst.getOperand(I);
3999 if (Op.isImm())
4000 return (unsigned)I;
4001 }
4002 }
4003
4004 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4005 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4006 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4007
4008 return InvalidCompOprIdx;
4009}
4010
4011bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4012 const OperandVector &Operands) {
4013
4014 unsigned Opcode = Inst.getOpcode();
4015 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4016
4017 if (AsVOPD3) {
4018 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4019 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4020 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4021 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4022 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4023 }
4024 }
4025
4026 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4027 if (!InvalidCompOprIdx.has_value())
4028 return true;
4029
4030 auto CompOprIdx = *InvalidCompOprIdx;
4031 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4032 auto ParsedIdx =
4033 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4034 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4035 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4036
4037 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4038 if (CompOprIdx == VOPD::Component::DST) {
4039 if (AsVOPD3)
4040 Error(Loc, "dst registers must be distinct");
4041 else
4042 Error(Loc, "one dst register must be even and the other odd");
4043 } else {
4044 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4045 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4046 " operands must use different VGPR banks");
4047 }
4048
4049 return false;
4050}
4051
4052// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4053// potentially used as VOPD3 with the same operands.
4054bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4055 // First check if it fits VOPD
4056 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4057 if (!InvalidCompOprIdx.has_value())
4058 return false;
4059
4060 // Then if it fits VOPD3
4061 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4062 if (InvalidCompOprIdx.has_value()) {
4063 // If failed operand is dst it is better to show error about VOPD3
4064 // instruction as it has more capabilities and error message will be
4065 // more informative. If the dst is not legal for VOPD3, then it is not
4066 // legal for VOPD either.
4067 if (*InvalidCompOprIdx == VOPD::Component::DST)
4068 return true;
4069
4070 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4071 // with a conflict in tied implicit src2 of fmac and no asm operand to
4072 // to point to.
4073 return false;
4074 }
4075 return true;
4076}
4077
4078// \returns true is a VOPD3 instruction can be also represented as a shorter
4079// VOPD encoding.
4080bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4081 const unsigned Opcode = Inst.getOpcode();
4082 const auto &II = getVOPDInstInfo(Opcode, &MII);
4083 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4084 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4085 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4086 return false;
4087
4088 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4089 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4090 // be parsed as VOPD which does not accept src2.
4091 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4092 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4093 return false;
4094
4095 // If any modifiers are set this cannot be VOPD.
4096 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4097 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4098 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4099 int I = getNamedOperandIdx(Opcode, OpName);
4100 if (I == -1)
4101 continue;
4102 if (Inst.getOperand(I).getImm())
4103 return false;
4104 }
4105
4106 return !tryVOPD3(Inst);
4107}
4108
4109// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4110// form but switch to VOPD3 otherwise.
4111bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4112 const unsigned Opcode = Inst.getOpcode();
4113 if (!isGFX1250Plus() || !isVOPD(Opcode))
4114 return false;
4115
4116 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4117 return tryVOPD(Inst);
4118 return tryVOPD3(Inst);
4119}
4120
4121bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4122
4123 const unsigned Opc = Inst.getOpcode();
4124 const MCInstrDesc &Desc = MII.get(Opc);
4125
4126 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4127 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4128 assert(ClampIdx != -1);
4129 return Inst.getOperand(ClampIdx).getImm() == 0;
4130 }
4131
4132 return true;
4133}
4134
4137
4138bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4139
4140 const unsigned Opc = Inst.getOpcode();
4141 const MCInstrDesc &Desc = MII.get(Opc);
4142
4143 if ((Desc.TSFlags & MIMGFlags) == 0)
4144 return true;
4145
4146 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4147 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4148 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4149
4150 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4151 return true;
4152
4153 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4154 return true;
4155
4156 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4157 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4158 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4159 if (DMask == 0)
4160 DMask = 1;
4161
4162 bool IsPackedD16 = false;
4163 unsigned DataSize =
4164 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4165 if (hasPackedD16()) {
4166 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4167 IsPackedD16 = D16Idx >= 0;
4168 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4169 DataSize = (DataSize + 1) / 2;
4170 }
4171
4172 if ((VDataSize / 4) == DataSize + TFESize)
4173 return true;
4174
4175 StringRef Modifiers;
4176 if (isGFX90A())
4177 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4178 else
4179 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4180
4181 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4182 return false;
4183}
4184
4185bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4186 const unsigned Opc = Inst.getOpcode();
4187 const MCInstrDesc &Desc = MII.get(Opc);
4188
4189 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4190 return true;
4191
4192 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4193
4194 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4196 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4197 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4198 ? AMDGPU::OpName::srsrc
4199 : AMDGPU::OpName::rsrc;
4200 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4201 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4202 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4203
4204 assert(VAddr0Idx != -1);
4205 assert(SrsrcIdx != -1);
4206 assert(SrsrcIdx > VAddr0Idx);
4207
4208 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4209 if (BaseOpcode->BVH) {
4210 if (IsA16 == BaseOpcode->A16)
4211 return true;
4212 Error(IDLoc, "image address size does not match a16");
4213 return false;
4214 }
4215
4216 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4217 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4218 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4219 unsigned ActualAddrSize =
4220 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4221
4222 unsigned ExpectedAddrSize =
4223 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4224
4225 if (IsNSA) {
4226 if (hasPartialNSAEncoding() &&
4227 ExpectedAddrSize >
4229 int VAddrLastIdx = SrsrcIdx - 1;
4230 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4231
4232 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4233 }
4234 } else {
4235 if (ExpectedAddrSize > 12)
4236 ExpectedAddrSize = 16;
4237
4238 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4239 // This provides backward compatibility for assembly created
4240 // before 160b/192b/224b types were directly supported.
4241 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4242 return true;
4243 }
4244
4245 if (ActualAddrSize == ExpectedAddrSize)
4246 return true;
4247
4248 Error(IDLoc, "image address size does not match dim and a16");
4249 return false;
4250}
4251
4252bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4253
4254 const unsigned Opc = Inst.getOpcode();
4255 const MCInstrDesc &Desc = MII.get(Opc);
4256
4257 if ((Desc.TSFlags & MIMGFlags) == 0)
4258 return true;
4259 if (!Desc.mayLoad() || !Desc.mayStore())
4260 return true; // Not atomic
4261
4262 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4263 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4264
4265 // This is an incomplete check because image_atomic_cmpswap
4266 // may only use 0x3 and 0xf while other atomic operations
4267 // may use 0x1 and 0x3. However these limitations are
4268 // verified when we check that dmask matches dst size.
4269 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4270}
4271
4272bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4273
4274 const unsigned Opc = Inst.getOpcode();
4275 const MCInstrDesc &Desc = MII.get(Opc);
4276
4277 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4278 return true;
4279
4280 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4281 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4282
4283 // GATHER4 instructions use dmask in a different fashion compared to
4284 // other MIMG instructions. The only useful DMASK values are
4285 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4286 // (red,red,red,red) etc.) The ISA document doesn't mention
4287 // this.
4288 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4289}
4290
4291bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4292 const OperandVector &Operands) {
4293 if (!isGFX10Plus())
4294 return true;
4295
4296 const unsigned Opc = Inst.getOpcode();
4297 const MCInstrDesc &Desc = MII.get(Opc);
4298
4299 if ((Desc.TSFlags & MIMGFlags) == 0)
4300 return true;
4301
4302 // image_bvh_intersect_ray instructions do not have dim
4304 return true;
4305
4306 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4307 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4308 if (Op.isDim())
4309 return true;
4310 }
4311 return false;
4312}
4313
4314bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4315 const unsigned Opc = Inst.getOpcode();
4316 const MCInstrDesc &Desc = MII.get(Opc);
4317
4318 if ((Desc.TSFlags & MIMGFlags) == 0)
4319 return true;
4320
4321 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4322 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4324
4325 if (!BaseOpcode->MSAA)
4326 return true;
4327
4328 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4329 assert(DimIdx != -1);
4330
4331 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4332 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4333
4334 return DimInfo->MSAA;
4335}
4336
4337static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4338{
4339 switch (Opcode) {
4340 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4341 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4342 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4343 return true;
4344 default:
4345 return false;
4346 }
4347}
4348
4349// movrels* opcodes should only allow VGPRS as src0.
4350// This is specified in .td description for vop1/vop3,
4351// but sdwa is handled differently. See isSDWAOperand.
4352bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4353 const OperandVector &Operands) {
4354
4355 const unsigned Opc = Inst.getOpcode();
4356 const MCInstrDesc &Desc = MII.get(Opc);
4357
4358 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4359 return true;
4360
4361 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4362 assert(Src0Idx != -1);
4363
4364 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4365 if (Src0.isReg()) {
4366 auto Reg = mc2PseudoReg(Src0.getReg());
4367 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4368 if (!isSGPR(Reg, TRI))
4369 return true;
4370 }
4371
4372 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4373 return false;
4374}
4375
4376bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4377 const OperandVector &Operands) {
4378
4379 const unsigned Opc = Inst.getOpcode();
4380
4381 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4382 return true;
4383
4384 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4385 assert(Src0Idx != -1);
4386
4387 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4388 if (!Src0.isReg())
4389 return true;
4390
4391 auto Reg = mc2PseudoReg(Src0.getReg());
4392 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4393 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4394 Error(getOperandLoc(Operands, Src0Idx),
4395 "source operand must be either a VGPR or an inline constant");
4396 return false;
4397 }
4398
4399 return true;
4400}
4401
4402bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4403 const OperandVector &Operands) {
4404 unsigned Opcode = Inst.getOpcode();
4405 const MCInstrDesc &Desc = MII.get(Opcode);
4406
4407 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4408 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4409 return true;
4410
4411 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4412 if (Src2Idx == -1)
4413 return true;
4414
4415 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4416 Error(getOperandLoc(Operands, Src2Idx),
4417 "inline constants are not allowed for this operand");
4418 return false;
4419 }
4420
4421 return true;
4422}
4423
4424bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4425 const OperandVector &Operands) {
4426 const unsigned Opc = Inst.getOpcode();
4427 const MCInstrDesc &Desc = MII.get(Opc);
4428
4429 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4430 return true;
4431
4432 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4433 if (BlgpIdx != -1) {
4434 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4435 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4436
4437 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4438 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4439
4440 // Validate the correct register size was used for the floating point
4441 // format operands
4442
4443 bool Success = true;
4444 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4445 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4446 Error(getOperandLoc(Operands, Src0Idx),
4447 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4448 Success = false;
4449 }
4450
4451 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4452 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4453 Error(getOperandLoc(Operands, Src1Idx),
4454 "wrong register tuple size for blgp value " + Twine(BLGP));
4455 Success = false;
4456 }
4457
4458 return Success;
4459 }
4460 }
4461
4462 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4463 if (Src2Idx == -1)
4464 return true;
4465
4466 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4467 if (!Src2.isReg())
4468 return true;
4469
4470 MCRegister Src2Reg = Src2.getReg();
4471 MCRegister DstReg = Inst.getOperand(0).getReg();
4472 if (Src2Reg == DstReg)
4473 return true;
4474
4475 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4476 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4477 .getSizeInBits() <= 128)
4478 return true;
4479
4480 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4481 Error(getOperandLoc(Operands, Src2Idx),
4482 "source 2 operand must not partially overlap with dst");
4483 return false;
4484 }
4485
4486 return true;
4487}
4488
4489bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4490 switch (Inst.getOpcode()) {
4491 default:
4492 return true;
4493 case V_DIV_SCALE_F32_gfx6_gfx7:
4494 case V_DIV_SCALE_F32_vi:
4495 case V_DIV_SCALE_F32_gfx10:
4496 case V_DIV_SCALE_F64_gfx6_gfx7:
4497 case V_DIV_SCALE_F64_vi:
4498 case V_DIV_SCALE_F64_gfx10:
4499 break;
4500 }
4501
4502 // TODO: Check that src0 = src1 or src2.
4503
4504 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4505 AMDGPU::OpName::src2_modifiers,
4506 AMDGPU::OpName::src2_modifiers}) {
4507 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4508 .getImm() &
4510 return false;
4511 }
4512 }
4513
4514 return true;
4515}
4516
4517bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4518
4519 const unsigned Opc = Inst.getOpcode();
4520 const MCInstrDesc &Desc = MII.get(Opc);
4521
4522 if ((Desc.TSFlags & MIMGFlags) == 0)
4523 return true;
4524
4525 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4526 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4527 if (isCI() || isSI())
4528 return false;
4529 }
4530
4531 return true;
4532}
4533
4534bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4535 const unsigned Opc = Inst.getOpcode();
4536 const MCInstrDesc &Desc = MII.get(Opc);
4537
4538 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4539 return true;
4540
4541 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4542
4543 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4544}
4545
4546static bool IsRevOpcode(const unsigned Opcode)
4547{
4548 switch (Opcode) {
4549 case AMDGPU::V_SUBREV_F32_e32:
4550 case AMDGPU::V_SUBREV_F32_e64:
4551 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4552 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4553 case AMDGPU::V_SUBREV_F32_e32_vi:
4554 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4555 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4556 case AMDGPU::V_SUBREV_F32_e64_vi:
4557
4558 case AMDGPU::V_SUBREV_CO_U32_e32:
4559 case AMDGPU::V_SUBREV_CO_U32_e64:
4560 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4561 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4562
4563 case AMDGPU::V_SUBBREV_U32_e32:
4564 case AMDGPU::V_SUBBREV_U32_e64:
4565 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4566 case AMDGPU::V_SUBBREV_U32_e32_vi:
4567 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4568 case AMDGPU::V_SUBBREV_U32_e64_vi:
4569
4570 case AMDGPU::V_SUBREV_U32_e32:
4571 case AMDGPU::V_SUBREV_U32_e64:
4572 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4573 case AMDGPU::V_SUBREV_U32_e32_vi:
4574 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4575 case AMDGPU::V_SUBREV_U32_e64_vi:
4576
4577 case AMDGPU::V_SUBREV_F16_e32:
4578 case AMDGPU::V_SUBREV_F16_e64:
4579 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4580 case AMDGPU::V_SUBREV_F16_e32_vi:
4581 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4582 case AMDGPU::V_SUBREV_F16_e64_vi:
4583
4584 case AMDGPU::V_SUBREV_U16_e32:
4585 case AMDGPU::V_SUBREV_U16_e64:
4586 case AMDGPU::V_SUBREV_U16_e32_vi:
4587 case AMDGPU::V_SUBREV_U16_e64_vi:
4588
4589 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4590 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4591 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4592
4593 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4594 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4595
4596 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4597 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4598
4599 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4600 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4601
4602 case AMDGPU::V_LSHRREV_B32_e32:
4603 case AMDGPU::V_LSHRREV_B32_e64:
4604 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4605 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4606 case AMDGPU::V_LSHRREV_B32_e32_vi:
4607 case AMDGPU::V_LSHRREV_B32_e64_vi:
4608 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4609 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4610
4611 case AMDGPU::V_ASHRREV_I32_e32:
4612 case AMDGPU::V_ASHRREV_I32_e64:
4613 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4614 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4615 case AMDGPU::V_ASHRREV_I32_e32_vi:
4616 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4617 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4618 case AMDGPU::V_ASHRREV_I32_e64_vi:
4619
4620 case AMDGPU::V_LSHLREV_B32_e32:
4621 case AMDGPU::V_LSHLREV_B32_e64:
4622 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4623 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4624 case AMDGPU::V_LSHLREV_B32_e32_vi:
4625 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4626 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4627 case AMDGPU::V_LSHLREV_B32_e64_vi:
4628
4629 case AMDGPU::V_LSHLREV_B16_e32:
4630 case AMDGPU::V_LSHLREV_B16_e64:
4631 case AMDGPU::V_LSHLREV_B16_e32_vi:
4632 case AMDGPU::V_LSHLREV_B16_e64_vi:
4633 case AMDGPU::V_LSHLREV_B16_gfx10:
4634
4635 case AMDGPU::V_LSHRREV_B16_e32:
4636 case AMDGPU::V_LSHRREV_B16_e64:
4637 case AMDGPU::V_LSHRREV_B16_e32_vi:
4638 case AMDGPU::V_LSHRREV_B16_e64_vi:
4639 case AMDGPU::V_LSHRREV_B16_gfx10:
4640
4641 case AMDGPU::V_ASHRREV_I16_e32:
4642 case AMDGPU::V_ASHRREV_I16_e64:
4643 case AMDGPU::V_ASHRREV_I16_e32_vi:
4644 case AMDGPU::V_ASHRREV_I16_e64_vi:
4645 case AMDGPU::V_ASHRREV_I16_gfx10:
4646
4647 case AMDGPU::V_LSHLREV_B64_e64:
4648 case AMDGPU::V_LSHLREV_B64_gfx10:
4649 case AMDGPU::V_LSHLREV_B64_vi:
4650
4651 case AMDGPU::V_LSHRREV_B64_e64:
4652 case AMDGPU::V_LSHRREV_B64_gfx10:
4653 case AMDGPU::V_LSHRREV_B64_vi:
4654
4655 case AMDGPU::V_ASHRREV_I64_e64:
4656 case AMDGPU::V_ASHRREV_I64_gfx10:
4657 case AMDGPU::V_ASHRREV_I64_vi:
4658
4659 case AMDGPU::V_PK_LSHLREV_B16:
4660 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4661 case AMDGPU::V_PK_LSHLREV_B16_vi:
4662
4663 case AMDGPU::V_PK_LSHRREV_B16:
4664 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4665 case AMDGPU::V_PK_LSHRREV_B16_vi:
4666 case AMDGPU::V_PK_ASHRREV_I16:
4667 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4668 case AMDGPU::V_PK_ASHRREV_I16_vi:
4669 return true;
4670 default:
4671 return false;
4672 }
4673}
4674
4675bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4676 const OperandVector &Operands) {
4677 using namespace SIInstrFlags;
4678 const unsigned Opcode = Inst.getOpcode();
4679 const MCInstrDesc &Desc = MII.get(Opcode);
4680
4681 // lds_direct register is defined so that it can be used
4682 // with 9-bit operands only. Ignore encodings which do not accept these.
4683 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4684 if ((Desc.TSFlags & Enc) == 0)
4685 return true;
4686
4687 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4688 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4689 if (SrcIdx == -1)
4690 break;
4691 const auto &Src = Inst.getOperand(SrcIdx);
4692 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4693
4694 if (isGFX90A() || isGFX11Plus()) {
4695 Error(getOperandLoc(Operands, SrcIdx),
4696 "lds_direct is not supported on this GPU");
4697 return false;
4698 }
4699
4700 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4701 Error(getOperandLoc(Operands, SrcIdx),
4702 "lds_direct cannot be used with this instruction");
4703 return false;
4704 }
4705
4706 if (SrcName != OpName::src0) {
4707 Error(getOperandLoc(Operands, SrcIdx),
4708 "lds_direct may be used as src0 only");
4709 return false;
4710 }
4711 }
4712 }
4713
4714 return true;
4715}
4716
4717SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4718 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4719 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4720 if (Op.isFlatOffset())
4721 return Op.getStartLoc();
4722 }
4723 return getLoc();
4724}
4725
4726bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4727 const OperandVector &Operands) {
4728 auto Opcode = Inst.getOpcode();
4729 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4730 if (OpNum == -1)
4731 return true;
4732
4733 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4734 if ((TSFlags & SIInstrFlags::FLAT))
4735 return validateFlatOffset(Inst, Operands);
4736
4737 if ((TSFlags & SIInstrFlags::SMRD))
4738 return validateSMEMOffset(Inst, Operands);
4739
4740 const auto &Op = Inst.getOperand(OpNum);
4741 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4742 if (isGFX12Plus() &&
4743 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4744 const unsigned OffsetSize = 24;
4745 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4746 Error(getFlatOffsetLoc(Operands),
4747 Twine("expected a ") + Twine(OffsetSize - 1) +
4748 "-bit unsigned offset for buffer ops");
4749 return false;
4750 }
4751 } else {
4752 const unsigned OffsetSize = 16;
4753 if (!isUIntN(OffsetSize, Op.getImm())) {
4754 Error(getFlatOffsetLoc(Operands),
4755 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4756 return false;
4757 }
4758 }
4759 return true;
4760}
4761
4762bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4763 const OperandVector &Operands) {
4764 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4765 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4766 return true;
4767
4768 auto Opcode = Inst.getOpcode();
4769 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4770 assert(OpNum != -1);
4771
4772 const auto &Op = Inst.getOperand(OpNum);
4773 if (!hasFlatOffsets() && Op.getImm() != 0) {
4774 Error(getFlatOffsetLoc(Operands),
4775 "flat offset modifier is not supported on this GPU");
4776 return false;
4777 }
4778
4779 // For pre-GFX12 FLAT instructions the offset must be positive;
4780 // MSB is ignored and forced to zero.
4781 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4782 bool AllowNegative =
4784 isGFX12Plus();
4785 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4786 Error(getFlatOffsetLoc(Operands),
4787 Twine("expected a ") +
4788 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4789 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4790 return false;
4791 }
4792
4793 return true;
4794}
4795
4796SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4797 // Start with second operand because SMEM Offset cannot be dst or src0.
4798 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4799 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4800 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4801 return Op.getStartLoc();
4802 }
4803 return getLoc();
4804}
4805
4806bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4807 const OperandVector &Operands) {
4808 if (isCI() || isSI())
4809 return true;
4810
4811 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4812 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4813 return true;
4814
4815 auto Opcode = Inst.getOpcode();
4816 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4817 if (OpNum == -1)
4818 return true;
4819
4820 const auto &Op = Inst.getOperand(OpNum);
4821 if (!Op.isImm())
4822 return true;
4823
4824 uint64_t Offset = Op.getImm();
4825 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4828 return true;
4829
4830 Error(getSMEMOffsetLoc(Operands),
4831 isGFX12Plus() && IsBuffer
4832 ? "expected a 23-bit unsigned offset for buffer ops"
4833 : isGFX12Plus() ? "expected a 24-bit signed offset"
4834 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4835 : "expected a 21-bit signed offset");
4836
4837 return false;
4838}
4839
4840bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4841 const OperandVector &Operands) {
4842 unsigned Opcode = Inst.getOpcode();
4843 const MCInstrDesc &Desc = MII.get(Opcode);
4844 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4845 return true;
4846
4847 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4848 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4849
4850 const int OpIndices[] = { Src0Idx, Src1Idx };
4851
4852 unsigned NumExprs = 0;
4853 unsigned NumLiterals = 0;
4854 int64_t LiteralValue;
4855
4856 for (int OpIdx : OpIndices) {
4857 if (OpIdx == -1) break;
4858
4859 const MCOperand &MO = Inst.getOperand(OpIdx);
4860 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4862 bool IsLit = false;
4863 std::optional<int64_t> Imm;
4864 if (MO.isImm()) {
4865 Imm = MO.getImm();
4866 } else if (MO.isExpr()) {
4867 if (isLitExpr(MO.getExpr())) {
4868 IsLit = true;
4869 Imm = getLitValue(MO.getExpr());
4870 }
4871 } else {
4872 continue;
4873 }
4874
4875 if (!Imm.has_value()) {
4876 ++NumExprs;
4877 } else if (!isInlineConstant(Inst, OpIdx)) {
4878 auto OpType = static_cast<AMDGPU::OperandType>(
4879 Desc.operands()[OpIdx].OperandType);
4880 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4881 if (NumLiterals == 0 || LiteralValue != Value) {
4883 ++NumLiterals;
4884 }
4885 }
4886 }
4887 }
4888
4889 if (NumLiterals + NumExprs <= 1)
4890 return true;
4891
4892 Error(getOperandLoc(Operands, Src1Idx),
4893 "only one unique literal operand is allowed");
4894 return false;
4895}
4896
4897bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4898 const unsigned Opc = Inst.getOpcode();
4899 if (isPermlane16(Opc)) {
4900 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4901 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4902
4903 if (OpSel & ~3)
4904 return false;
4905 }
4906
4907 uint64_t TSFlags = MII.get(Opc).TSFlags;
4908
4909 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4910 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4911 if (OpSelIdx != -1) {
4912 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4913 return false;
4914 }
4915 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4916 if (OpSelHiIdx != -1) {
4917 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4918 return false;
4919 }
4920 }
4921
4922 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4923 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4924 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4925 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4926 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4927 if (OpSel & 3)
4928 return false;
4929 }
4930
4931 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4932 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4933 // the first SGPR and use it for both the low and high operations.
4934 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4935 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4936 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4937 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4938 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4939
4940 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4941 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4942 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4943 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4944
4945 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4946
4947 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4948 unsigned Mask = 1U << Index;
4949 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4950 };
4951
4952 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4953 !VerifyOneSGPR(/*Index=*/0))
4954 return false;
4955 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4956 !VerifyOneSGPR(/*Index=*/1))
4957 return false;
4958
4959 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4960 if (Src2Idx != -1) {
4961 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4962 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4963 !VerifyOneSGPR(/*Index=*/2))
4964 return false;
4965 }
4966 }
4967
4968 return true;
4969}
4970
4971bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4972 if (!hasTrue16Insts())
4973 return true;
4974 const MCRegisterInfo *MRI = getMRI();
4975 const unsigned Opc = Inst.getOpcode();
4976 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4977 if (OpSelIdx == -1)
4978 return true;
4979 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4980 // If the value is 0 we could have a default OpSel Operand, so conservatively
4981 // allow it.
4982 if (OpSelOpValue == 0)
4983 return true;
4984 unsigned OpCount = 0;
4985 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4986 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4987 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4988 if (OpIdx == -1)
4989 continue;
4990 const MCOperand &Op = Inst.getOperand(OpIdx);
4991 if (Op.isReg() &&
4992 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4993 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4994 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4995 if (OpSelOpIsHi != VGPRSuffixIsHi)
4996 return false;
4997 }
4998 ++OpCount;
4999 }
5000
5001 return true;
5002}
5003
5004bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5005 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5006
5007 const unsigned Opc = Inst.getOpcode();
5008 uint64_t TSFlags = MII.get(Opc).TSFlags;
5009
5010 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5011 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5012 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5013 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5014 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5015 !(TSFlags & SIInstrFlags::IsSWMMAC))
5016 return true;
5017
5018 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5019 if (NegIdx == -1)
5020 return true;
5021
5022 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5023
5024 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5025 // on some src operands but not allowed on other.
5026 // It is convenient that such instructions don't have src_modifiers operand
5027 // for src operands that don't allow neg because they also don't allow opsel.
5028
5029 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5030 AMDGPU::OpName::src1_modifiers,
5031 AMDGPU::OpName::src2_modifiers};
5032
5033 for (unsigned i = 0; i < 3; ++i) {
5034 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5035 if (Neg & (1 << i))
5036 return false;
5037 }
5038 }
5039
5040 return true;
5041}
5042
5043bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5044 const OperandVector &Operands) {
5045 const unsigned Opc = Inst.getOpcode();
5046 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5047 if (DppCtrlIdx >= 0) {
5048 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5049
5050 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5051 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5052 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5053 // only on GFX12.
5054 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5055 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5056 : "DP ALU dpp only supports row_newbcast");
5057 return false;
5058 }
5059 }
5060
5061 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5062 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5063
5064 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5065 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5066 if (Src1Idx >= 0) {
5067 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5068 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5069 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5070 Error(getOperandLoc(Operands, Src1Idx),
5071 "invalid operand for instruction");
5072 return false;
5073 }
5074 if (Src1.isImm()) {
5075 Error(getInstLoc(Operands),
5076 "src1 immediate operand invalid for instruction");
5077 return false;
5078 }
5079 }
5080 }
5081
5082 return true;
5083}
5084
5085// Check if VCC register matches wavefront size
5086bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5087 return (Reg == AMDGPU::VCC && isWave64()) ||
5088 (Reg == AMDGPU::VCC_LO && isWave32());
5089}
5090
5091// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5092bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5093 const OperandVector &Operands) {
5094 unsigned Opcode = Inst.getOpcode();
5095 const MCInstrDesc &Desc = MII.get(Opcode);
5096 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5097 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5098 !HasMandatoryLiteral && !isVOPD(Opcode))
5099 return true;
5100
5101 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5102
5103 std::optional<unsigned> LiteralOpIdx;
5104 std::optional<uint64_t> LiteralValue;
5105
5106 for (int OpIdx : OpIndices) {
5107 if (OpIdx == -1)
5108 continue;
5109
5110 const MCOperand &MO = Inst.getOperand(OpIdx);
5111 if (!MO.isImm() && !MO.isExpr())
5112 continue;
5113 if (!isSISrcOperand(Desc, OpIdx))
5114 continue;
5115
5116 std::optional<int64_t> Imm;
5117 if (MO.isImm())
5118 Imm = MO.getImm();
5119 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5120 Imm = getLitValue(MO.getExpr());
5121
5122 bool IsAnotherLiteral = false;
5123 if (!Imm.has_value()) {
5124 // Literal value not known, so we conservately assume it's different.
5125 IsAnotherLiteral = true;
5126 } else if (!isInlineConstant(Inst, OpIdx)) {
5127 uint64_t Value = *Imm;
5128 bool IsForcedFP64 =
5129 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5131 HasMandatoryLiteral);
5132 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5133 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5134 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5135
5136 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5137 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5138 Error(getOperandLoc(Operands, OpIdx),
5139 "invalid operand for instruction");
5140 return false;
5141 }
5142
5143 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5144 Value = Hi_32(Value);
5145
5146 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5148 }
5149
5150 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5151 !getFeatureBits()[FeatureVOP3Literal]) {
5152 Error(getOperandLoc(Operands, OpIdx),
5153 "literal operands are not supported");
5154 return false;
5155 }
5156
5157 if (LiteralOpIdx && IsAnotherLiteral) {
5158 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5159 getOperandLoc(Operands, *LiteralOpIdx)),
5160 "only one unique literal operand is allowed");
5161 return false;
5162 }
5163
5164 if (IsAnotherLiteral)
5165 LiteralOpIdx = OpIdx;
5166 }
5167
5168 return true;
5169}
5170
5171// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5172static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5173 const MCRegisterInfo *MRI) {
5174 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5175 if (OpIdx < 0)
5176 return -1;
5177
5178 const MCOperand &Op = Inst.getOperand(OpIdx);
5179 if (!Op.isReg())
5180 return -1;
5181
5182 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5183 auto Reg = Sub ? Sub : Op.getReg();
5184 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5185 return AGPR32.contains(Reg) ? 1 : 0;
5186}
5187
5188bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5189 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5190 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5192 SIInstrFlags::DS)) == 0)
5193 return true;
5194
5195 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5196 ? AMDGPU::OpName::data0
5197 : AMDGPU::OpName::vdata;
5198
5199 const MCRegisterInfo *MRI = getMRI();
5200 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5201 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5202
5203 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5204 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5205 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5206 return false;
5207 }
5208
5209 auto FB = getFeatureBits();
5210 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5211 if (DataAreg < 0 || DstAreg < 0)
5212 return true;
5213 return DstAreg == DataAreg;
5214 }
5215
5216 return DstAreg < 1 && DataAreg < 1;
5217}
5218
5219bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5220 auto FB = getFeatureBits();
5221 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5222 return true;
5223
5224 unsigned Opc = Inst.getOpcode();
5225 const MCRegisterInfo *MRI = getMRI();
5226 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5227 // unaligned VGPR. All others only allow even aligned VGPRs.
5228 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5229 return true;
5230
5231 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5232 switch (Opc) {
5233 default:
5234 break;
5235 case AMDGPU::DS_LOAD_TR6_B96:
5236 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5237 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5238 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5239 return true;
5240 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5241 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5242 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5243 // allows unaligned VGPR for vdst, but other operands still only allow
5244 // even aligned VGPRs.
5245 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5246 if (VAddrIdx != -1) {
5247 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5248 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5249 if ((Sub - AMDGPU::VGPR0) & 1)
5250 return false;
5251 }
5252 return true;
5253 }
5254 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5255 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5256 return true;
5257 }
5258 }
5259
5260 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5261 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5262 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5263 const MCOperand &Op = Inst.getOperand(I);
5264 if (!Op.isReg())
5265 continue;
5266
5267 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5268 if (!Sub)
5269 continue;
5270
5271 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5272 return false;
5273 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5274 return false;
5275 }
5276
5277 return true;
5278}
5279
5280SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5281 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5282 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5283 if (Op.isBLGP())
5284 return Op.getStartLoc();
5285 }
5286 return SMLoc();
5287}
5288
5289bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5290 const OperandVector &Operands) {
5291 unsigned Opc = Inst.getOpcode();
5292 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5293 if (BlgpIdx == -1)
5294 return true;
5295 SMLoc BLGPLoc = getBLGPLoc(Operands);
5296 if (!BLGPLoc.isValid())
5297 return true;
5298 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5299 auto FB = getFeatureBits();
5300 bool UsesNeg = false;
5301 if (FB[AMDGPU::FeatureGFX940Insts]) {
5302 switch (Opc) {
5303 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5304 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5305 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5306 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5307 UsesNeg = true;
5308 }
5309 }
5310
5311 if (IsNeg == UsesNeg)
5312 return true;
5313
5314 Error(BLGPLoc,
5315 UsesNeg ? "invalid modifier: blgp is not supported"
5316 : "invalid modifier: neg is not supported");
5317
5318 return false;
5319}
5320
5321bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5322 const OperandVector &Operands) {
5323 if (!isGFX11Plus())
5324 return true;
5325
5326 unsigned Opc = Inst.getOpcode();
5327 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5328 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5329 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5330 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5331 return true;
5332
5333 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5334 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5335 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5336 if (Reg == AMDGPU::SGPR_NULL)
5337 return true;
5338
5339 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5340 return false;
5341}
5342
5343bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5344 const OperandVector &Operands) {
5345 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5346 if ((TSFlags & SIInstrFlags::DS) == 0)
5347 return true;
5348 if (TSFlags & SIInstrFlags::GWS)
5349 return validateGWS(Inst, Operands);
5350 // Only validate GDS for non-GWS instructions.
5351 if (hasGDS())
5352 return true;
5353 int GDSIdx =
5354 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5355 if (GDSIdx < 0)
5356 return true;
5357 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5358 if (GDS) {
5359 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5360 Error(S, "gds modifier is not supported on this GPU");
5361 return false;
5362 }
5363 return true;
5364}
5365
5366// gfx90a has an undocumented limitation:
5367// DS_GWS opcodes must use even aligned registers.
5368bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5369 const OperandVector &Operands) {
5370 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5371 return true;
5372
5373 int Opc = Inst.getOpcode();
5374 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5375 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5376 return true;
5377
5378 const MCRegisterInfo *MRI = getMRI();
5379 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5380 int Data0Pos =
5381 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5382 assert(Data0Pos != -1);
5383 auto Reg = Inst.getOperand(Data0Pos).getReg();
5384 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5385 if (RegIdx & 1) {
5386 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5387 return false;
5388 }
5389
5390 return true;
5391}
5392
5393bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5394 const OperandVector &Operands,
5395 SMLoc IDLoc) {
5396 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5397 AMDGPU::OpName::cpol);
5398 if (CPolPos == -1)
5399 return true;
5400
5401 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5402
5403 if (!isGFX1250Plus()) {
5404 if (CPol & CPol::SCAL) {
5405 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5406 StringRef CStr(S.getPointer());
5407 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5408 Error(S, "scale_offset is not supported on this GPU");
5409 }
5410 if (CPol & CPol::NV) {
5411 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5412 StringRef CStr(S.getPointer());
5413 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5414 Error(S, "nv is not supported on this GPU");
5415 }
5416 }
5417
5418 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5419 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5420 StringRef CStr(S.getPointer());
5421 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5422 Error(S, "scale_offset is not supported for this instruction");
5423 }
5424
5425 if (isGFX12Plus())
5426 return validateTHAndScopeBits(Inst, Operands, CPol);
5427
5428 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5429 if (TSFlags & SIInstrFlags::SMRD) {
5430 if (CPol && (isSI() || isCI())) {
5431 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5432 Error(S, "cache policy is not supported for SMRD instructions");
5433 return false;
5434 }
5435 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5436 Error(IDLoc, "invalid cache policy for SMEM instruction");
5437 return false;
5438 }
5439 }
5440
5441 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5442 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5445 if (!(TSFlags & AllowSCCModifier)) {
5446 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5447 StringRef CStr(S.getPointer());
5448 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5449 Error(S,
5450 "scc modifier is not supported for this instruction on this GPU");
5451 return false;
5452 }
5453 }
5454
5456 return true;
5457
5458 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5459 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5460 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5461 : "instruction must use glc");
5462 return false;
5463 }
5464 } else {
5465 if (CPol & CPol::GLC) {
5466 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5467 StringRef CStr(S.getPointer());
5469 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5470 Error(S, isGFX940() ? "instruction must not use sc0"
5471 : "instruction must not use glc");
5472 return false;
5473 }
5474 }
5475
5476 return true;
5477}
5478
5479bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5480 const OperandVector &Operands,
5481 const unsigned CPol) {
5482 const unsigned TH = CPol & AMDGPU::CPol::TH;
5483 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5484
5485 const unsigned Opcode = Inst.getOpcode();
5486 const MCInstrDesc &TID = MII.get(Opcode);
5487
5488 auto PrintError = [&](StringRef Msg) {
5489 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5490 Error(S, Msg);
5491 return false;
5492 };
5493
5494 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5496 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5497
5498 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5501 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5502
5503 if (TH == 0)
5504 return true;
5505
5506 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5507 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5508 (TH == AMDGPU::CPol::TH_NT_HT)))
5509 return PrintError("invalid th value for SMEM instruction");
5510
5511 if (TH == AMDGPU::CPol::TH_BYPASS) {
5512 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5514 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5516 return PrintError("scope and th combination is not valid");
5517 }
5518
5519 unsigned THType = AMDGPU::getTemporalHintType(TID);
5520 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5521 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5522 return PrintError("invalid th value for atomic instructions");
5523 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5524 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5525 return PrintError("invalid th value for store instructions");
5526 } else {
5527 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5528 return PrintError("invalid th value for load instructions");
5529 }
5530
5531 return true;
5532}
5533
5534bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5535 const OperandVector &Operands) {
5536 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5537 if (Desc.mayStore() &&
5539 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5540 if (Loc != getInstLoc(Operands)) {
5541 Error(Loc, "TFE modifier has no meaning for store instructions");
5542 return false;
5543 }
5544 }
5545
5546 return true;
5547}
5548
5549bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5550 const OperandVector &Operands) {
5551 unsigned Opc = Inst.getOpcode();
5552 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5553 const MCInstrDesc &Desc = MII.get(Opc);
5554
5555 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5556 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5557 if (FmtIdx == -1)
5558 return true;
5559 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5560 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5561 unsigned RegSize =
5562 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5563 .getSizeInBits();
5564
5566 return true;
5567
5568 Error(getOperandLoc(Operands, SrcIdx),
5569 "wrong register tuple size for " +
5570 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5571 return false;
5572 };
5573
5574 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5575 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5576}
5577
5578bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5579 const OperandVector &Operands) {
5580 if (!validateLdsDirect(Inst, Operands))
5581 return false;
5582 if (!validateTrue16OpSel(Inst)) {
5583 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5584 "op_sel operand conflicts with 16-bit operand suffix");
5585 return false;
5586 }
5587 if (!validateSOPLiteral(Inst, Operands))
5588 return false;
5589 if (!validateVOPLiteral(Inst, Operands)) {
5590 return false;
5591 }
5592 if (!validateConstantBusLimitations(Inst, Operands)) {
5593 return false;
5594 }
5595 if (!validateVOPD(Inst, Operands)) {
5596 return false;
5597 }
5598 if (!validateIntClampSupported(Inst)) {
5599 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5600 "integer clamping is not supported on this GPU");
5601 return false;
5602 }
5603 if (!validateOpSel(Inst)) {
5604 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5605 "invalid op_sel operand");
5606 return false;
5607 }
5608 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5609 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5610 "invalid neg_lo operand");
5611 return false;
5612 }
5613 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5614 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5615 "invalid neg_hi operand");
5616 return false;
5617 }
5618 if (!validateDPP(Inst, Operands)) {
5619 return false;
5620 }
5621 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5622 if (!validateMIMGD16(Inst)) {
5623 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5624 "d16 modifier is not supported on this GPU");
5625 return false;
5626 }
5627 if (!validateMIMGDim(Inst, Operands)) {
5628 Error(IDLoc, "missing dim operand");
5629 return false;
5630 }
5631 if (!validateTensorR128(Inst)) {
5632 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5633 "instruction must set modifier r128=0");
5634 return false;
5635 }
5636 if (!validateMIMGMSAA(Inst)) {
5637 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5638 "invalid dim; must be MSAA type");
5639 return false;
5640 }
5641 if (!validateMIMGDataSize(Inst, IDLoc)) {
5642 return false;
5643 }
5644 if (!validateMIMGAddrSize(Inst, IDLoc))
5645 return false;
5646 if (!validateMIMGAtomicDMask(Inst)) {
5647 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5648 "invalid atomic image dmask");
5649 return false;
5650 }
5651 if (!validateMIMGGatherDMask(Inst)) {
5652 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5653 "invalid image_gather dmask: only one bit must be set");
5654 return false;
5655 }
5656 if (!validateMovrels(Inst, Operands)) {
5657 return false;
5658 }
5659 if (!validateOffset(Inst, Operands)) {
5660 return false;
5661 }
5662 if (!validateMAIAccWrite(Inst, Operands)) {
5663 return false;
5664 }
5665 if (!validateMAISrc2(Inst, Operands)) {
5666 return false;
5667 }
5668 if (!validateMFMA(Inst, Operands)) {
5669 return false;
5670 }
5671 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5672 return false;
5673 }
5674
5675 if (!validateAGPRLdSt(Inst)) {
5676 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5677 ? "invalid register class: data and dst should be all VGPR or AGPR"
5678 : "invalid register class: agpr loads and stores not supported on this GPU"
5679 );
5680 return false;
5681 }
5682 if (!validateVGPRAlign(Inst)) {
5683 Error(IDLoc,
5684 "invalid register class: vgpr tuples must be 64 bit aligned");
5685 return false;
5686 }
5687 if (!validateDS(Inst, Operands)) {
5688 return false;
5689 }
5690
5691 if (!validateBLGP(Inst, Operands)) {
5692 return false;
5693 }
5694
5695 if (!validateDivScale(Inst)) {
5696 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5697 return false;
5698 }
5699 if (!validateWaitCnt(Inst, Operands)) {
5700 return false;
5701 }
5702 if (!validateTFE(Inst, Operands)) {
5703 return false;
5704 }
5705 if (!validateWMMA(Inst, Operands)) {
5706 return false;
5707 }
5708
5709 return true;
5710}
5711
5713 const FeatureBitset &FBS,
5714 unsigned VariantID = 0);
5715
5716static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5717 const FeatureBitset &AvailableFeatures,
5718 unsigned VariantID);
5719
5720bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5721 const FeatureBitset &FBS) {
5722 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5723}
5724
5725bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5726 const FeatureBitset &FBS,
5727 ArrayRef<unsigned> Variants) {
5728 for (auto Variant : Variants) {
5729 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5730 return true;
5731 }
5732
5733 return false;
5734}
5735
5736bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5737 SMLoc IDLoc) {
5738 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5739
5740 // Check if requested instruction variant is supported.
5741 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5742 return false;
5743
5744 // This instruction is not supported.
5745 // Clear any other pending errors because they are no longer relevant.
5746 getParser().clearPendingErrors();
5747
5748 // Requested instruction variant is not supported.
5749 // Check if any other variants are supported.
5750 StringRef VariantName = getMatchedVariantName();
5751 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5752 return Error(IDLoc,
5753 Twine(VariantName,
5754 " variant of this instruction is not supported"));
5755 }
5756
5757 // Check if this instruction may be used with a different wavesize.
5758 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5759 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5760 // FIXME: Use getAvailableFeatures, and do not manually recompute
5761 FeatureBitset FeaturesWS32 = getFeatureBits();
5762 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5763 .flip(AMDGPU::FeatureWavefrontSize32);
5764 FeatureBitset AvailableFeaturesWS32 =
5765 ComputeAvailableFeatures(FeaturesWS32);
5766
5767 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5768 return Error(IDLoc, "instruction requires wavesize=32");
5769 }
5770
5771 // Finally check if this instruction is supported on any other GPU.
5772 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5773 return Error(IDLoc, "instruction not supported on this GPU (" +
5774 getSTI().getCPU() + ")" + ": " + Mnemo);
5775 }
5776
5777 // Instruction not supported on any GPU. Probably a typo.
5778 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5779 return Error(IDLoc, "invalid instruction" + Suggestion);
5780}
5781
5782static bool isInvalidVOPDY(const OperandVector &Operands,
5783 uint64_t InvalidOprIdx) {
5784 assert(InvalidOprIdx < Operands.size());
5785 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5786 if (Op.isToken() && InvalidOprIdx > 1) {
5787 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5788 return PrevOp.isToken() && PrevOp.getToken() == "::";
5789 }
5790 return false;
5791}
5792
5793bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5794 OperandVector &Operands,
5795 MCStreamer &Out,
5796 uint64_t &ErrorInfo,
5797 bool MatchingInlineAsm) {
5798 MCInst Inst;
5799 Inst.setLoc(IDLoc);
5800 unsigned Result = Match_Success;
5801 for (auto Variant : getMatchedVariants()) {
5802 uint64_t EI;
5803 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5804 Variant);
5805 // We order match statuses from least to most specific. We use most specific
5806 // status as resulting
5807 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5808 if (R == Match_Success || R == Match_MissingFeature ||
5809 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5810 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5811 Result != Match_MissingFeature)) {
5812 Result = R;
5813 ErrorInfo = EI;
5814 }
5815 if (R == Match_Success)
5816 break;
5817 }
5818
5819 if (Result == Match_Success) {
5820 if (!validateInstruction(Inst, IDLoc, Operands)) {
5821 return true;
5822 }
5823 Out.emitInstruction(Inst, getSTI());
5824 return false;
5825 }
5826
5827 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5828 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5829 return true;
5830 }
5831
5832 switch (Result) {
5833 default: break;
5834 case Match_MissingFeature:
5835 // It has been verified that the specified instruction
5836 // mnemonic is valid. A match was found but it requires
5837 // features which are not supported on this GPU.
5838 return Error(IDLoc, "operands are not valid for this GPU or mode");
5839
5840 case Match_InvalidOperand: {
5841 SMLoc ErrorLoc = IDLoc;
5842 if (ErrorInfo != ~0ULL) {
5843 if (ErrorInfo >= Operands.size()) {
5844 return Error(IDLoc, "too few operands for instruction");
5845 }
5846 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5847 if (ErrorLoc == SMLoc())
5848 ErrorLoc = IDLoc;
5849
5850 if (isInvalidVOPDY(Operands, ErrorInfo))
5851 return Error(ErrorLoc, "invalid VOPDY instruction");
5852 }
5853 return Error(ErrorLoc, "invalid operand for instruction");
5854 }
5855
5856 case Match_MnemonicFail:
5857 llvm_unreachable("Invalid instructions should have been handled already");
5858 }
5859 llvm_unreachable("Implement any new match types added!");
5860}
5861
5862bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5863 int64_t Tmp = -1;
5864 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5865 return true;
5866 }
5867 if (getParser().parseAbsoluteExpression(Tmp)) {
5868 return true;
5869 }
5870 Ret = static_cast<uint32_t>(Tmp);
5871 return false;
5872}
5873
5874bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5875 if (!getSTI().getTargetTriple().isAMDGCN())
5876 return TokError("directive only supported for amdgcn architecture");
5877
5878 std::string TargetIDDirective;
5879 SMLoc TargetStart = getTok().getLoc();
5880 if (getParser().parseEscapedString(TargetIDDirective))
5881 return true;
5882
5883 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5884 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5885 return getParser().Error(TargetRange.Start,
5886 (Twine(".amdgcn_target directive's target id ") +
5887 Twine(TargetIDDirective) +
5888 Twine(" does not match the specified target id ") +
5889 Twine(getTargetStreamer().getTargetID()->toString())).str());
5890
5891 return false;
5892}
5893
5894bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5895 return Error(Range.Start, "value out of range", Range);
5896}
5897
5898bool AMDGPUAsmParser::calculateGPRBlocks(
5899 const FeatureBitset &Features, const MCExpr *VCCUsed,
5900 const MCExpr *FlatScrUsed, bool XNACKUsed,
5901 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5902 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5903 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5904 // TODO(scott.linder): These calculations are duplicated from
5905 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5906 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5907 MCContext &Ctx = getContext();
5908
5909 const MCExpr *NumSGPRs = NextFreeSGPR;
5910 int64_t EvaluatedSGPRs;
5911
5912 if (Version.Major >= 10)
5914 else {
5915 unsigned MaxAddressableNumSGPRs =
5917
5918 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5919 !Features.test(FeatureSGPRInitBug) &&
5920 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5921 return OutOfRangeError(SGPRRange);
5922
5923 const MCExpr *ExtraSGPRs =
5924 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5925 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5926
5927 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5928 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5929 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5930 return OutOfRangeError(SGPRRange);
5931
5932 if (Features.test(FeatureSGPRInitBug))
5933 NumSGPRs =
5935 }
5936
5937 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5938 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5939 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5940 unsigned Granule) -> const MCExpr * {
5941 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5942 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5943 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5944 const MCExpr *AlignToGPR =
5945 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5946 const MCExpr *DivGPR =
5947 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5948 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5949 return SubGPR;
5950 };
5951
5952 VGPRBlocks = GetNumGPRBlocks(
5953 NextFreeVGPR,
5954 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5955 SGPRBlocks =
5956 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5957
5958 return false;
5959}
5960
5961bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5962 if (!getSTI().getTargetTriple().isAMDGCN())
5963 return TokError("directive only supported for amdgcn architecture");
5964
5965 if (!isHsaAbi(getSTI()))
5966 return TokError("directive only supported for amdhsa OS");
5967
5968 StringRef KernelName;
5969 if (getParser().parseIdentifier(KernelName))
5970 return true;
5971
5972 AMDGPU::MCKernelDescriptor KD =
5974 &getSTI(), getContext());
5975
5976 StringSet<> Seen;
5977
5978 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5979
5980 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5981 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5982
5983 SMRange VGPRRange;
5984 const MCExpr *NextFreeVGPR = ZeroExpr;
5985 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5986 const MCExpr *NamedBarCnt = ZeroExpr;
5987 uint64_t SharedVGPRCount = 0;
5988 uint64_t PreloadLength = 0;
5989 uint64_t PreloadOffset = 0;
5990 SMRange SGPRRange;
5991 const MCExpr *NextFreeSGPR = ZeroExpr;
5992
5993 // Count the number of user SGPRs implied from the enabled feature bits.
5994 unsigned ImpliedUserSGPRCount = 0;
5995
5996 // Track if the asm explicitly contains the directive for the user SGPR
5997 // count.
5998 std::optional<unsigned> ExplicitUserSGPRCount;
5999 const MCExpr *ReserveVCC = OneExpr;
6000 const MCExpr *ReserveFlatScr = OneExpr;
6001 std::optional<bool> EnableWavefrontSize32;
6002
6003 while (true) {
6004 while (trySkipToken(AsmToken::EndOfStatement));
6005
6006 StringRef ID;
6007 SMRange IDRange = getTok().getLocRange();
6008 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6009 return true;
6010
6011 if (ID == ".end_amdhsa_kernel")
6012 break;
6013
6014 if (!Seen.insert(ID).second)
6015 return TokError(".amdhsa_ directives cannot be repeated");
6016
6017 SMLoc ValStart = getLoc();
6018 const MCExpr *ExprVal;
6019 if (getParser().parseExpression(ExprVal))
6020 return true;
6021 SMLoc ValEnd = getLoc();
6022 SMRange ValRange = SMRange(ValStart, ValEnd);
6023
6024 int64_t IVal = 0;
6025 uint64_t Val = IVal;
6026 bool EvaluatableExpr;
6027 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6028 if (IVal < 0)
6029 return OutOfRangeError(ValRange);
6030 Val = IVal;
6031 }
6032
6033#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6034 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6035 return OutOfRangeError(RANGE); \
6036 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6037 getContext());
6038
6039// Some fields use the parsed value immediately which requires the expression to
6040// be solvable.
6041#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6042 if (!(RESOLVED)) \
6043 return Error(IDRange.Start, "directive should have resolvable expression", \
6044 IDRange);
6045
6046 if (ID == ".amdhsa_group_segment_fixed_size") {
6048 CHAR_BIT>(Val))
6049 return OutOfRangeError(ValRange);
6050 KD.group_segment_fixed_size = ExprVal;
6051 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6053 CHAR_BIT>(Val))
6054 return OutOfRangeError(ValRange);
6055 KD.private_segment_fixed_size = ExprVal;
6056 } else if (ID == ".amdhsa_kernarg_size") {
6057 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6058 return OutOfRangeError(ValRange);
6059 KD.kernarg_size = ExprVal;
6060 } else if (ID == ".amdhsa_user_sgpr_count") {
6061 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6062 ExplicitUserSGPRCount = Val;
6063 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6064 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6066 return Error(IDRange.Start,
6067 "directive is not supported with architected flat scratch",
6068 IDRange);
6070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6071 ExprVal, ValRange);
6072 if (Val)
6073 ImpliedUserSGPRCount += 4;
6074 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6075 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6076 if (!hasKernargPreload())
6077 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6078
6079 if (Val > getMaxNumUserSGPRs())
6080 return OutOfRangeError(ValRange);
6081 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6082 ValRange);
6083 if (Val) {
6084 ImpliedUserSGPRCount += Val;
6085 PreloadLength = Val;
6086 }
6087 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6088 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 if (!hasKernargPreload())
6090 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6091
6092 if (Val >= 1024)
6093 return OutOfRangeError(ValRange);
6094 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6095 ValRange);
6096 if (Val)
6097 PreloadOffset = Val;
6098 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6099 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6101 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6102 ValRange);
6103 if (Val)
6104 ImpliedUserSGPRCount += 2;
6105 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6106 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6108 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6109 ValRange);
6110 if (Val)
6111 ImpliedUserSGPRCount += 2;
6112 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6113 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6115 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6116 ExprVal, ValRange);
6117 if (Val)
6118 ImpliedUserSGPRCount += 2;
6119 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6120 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6122 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6123 ValRange);
6124 if (Val)
6125 ImpliedUserSGPRCount += 2;
6126 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6128 return Error(IDRange.Start,
6129 "directive is not supported with architected flat scratch",
6130 IDRange);
6131 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6133 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6134 ExprVal, ValRange);
6135 if (Val)
6136 ImpliedUserSGPRCount += 2;
6137 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6138 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6140 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6141 ExprVal, ValRange);
6142 if (Val)
6143 ImpliedUserSGPRCount += 1;
6144 } else if (ID == ".amdhsa_wavefront_size32") {
6145 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6146 if (IVersion.Major < 10)
6147 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6148 EnableWavefrontSize32 = Val;
6150 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6151 ValRange);
6152 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6154 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6158 return Error(IDRange.Start,
6159 "directive is not supported with architected flat scratch",
6160 IDRange);
6162 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6163 ValRange);
6164 } else if (ID == ".amdhsa_enable_private_segment") {
6166 return Error(
6167 IDRange.Start,
6168 "directive is not supported without architected flat scratch",
6169 IDRange);
6171 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6172 ValRange);
6173 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6175 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6176 ValRange);
6177 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6179 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6180 ValRange);
6181 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6183 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6184 ValRange);
6185 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6187 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6188 ValRange);
6189 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6191 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6192 ValRange);
6193 } else if (ID == ".amdhsa_next_free_vgpr") {
6194 VGPRRange = ValRange;
6195 NextFreeVGPR = ExprVal;
6196 } else if (ID == ".amdhsa_next_free_sgpr") {
6197 SGPRRange = ValRange;
6198 NextFreeSGPR = ExprVal;
6199 } else if (ID == ".amdhsa_accum_offset") {
6200 if (!isGFX90A())
6201 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6202 AccumOffset = ExprVal;
6203 } else if (ID == ".amdhsa_named_barrier_count") {
6204 if (!isGFX1250Plus())
6205 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6206 NamedBarCnt = ExprVal;
6207 } else if (ID == ".amdhsa_reserve_vcc") {
6208 if (EvaluatableExpr && !isUInt<1>(Val))
6209 return OutOfRangeError(ValRange);
6210 ReserveVCC = ExprVal;
6211 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6212 if (IVersion.Major < 7)
6213 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6215 return Error(IDRange.Start,
6216 "directive is not supported with architected flat scratch",
6217 IDRange);
6218 if (EvaluatableExpr && !isUInt<1>(Val))
6219 return OutOfRangeError(ValRange);
6220 ReserveFlatScr = ExprVal;
6221 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6222 if (IVersion.Major < 8)
6223 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6224 if (!isUInt<1>(Val))
6225 return OutOfRangeError(ValRange);
6226 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6227 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6228 IDRange);
6229 } else if (ID == ".amdhsa_float_round_mode_32") {
6231 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6232 ValRange);
6233 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6235 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6236 ValRange);
6237 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6239 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6240 ValRange);
6241 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6243 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6244 ValRange);
6245 } else if (ID == ".amdhsa_dx10_clamp") {
6246 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6247 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6248 IDRange);
6250 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6251 ValRange);
6252 } else if (ID == ".amdhsa_ieee_mode") {
6253 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6254 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6255 IDRange);
6257 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6258 ValRange);
6259 } else if (ID == ".amdhsa_fp16_overflow") {
6260 if (IVersion.Major < 9)
6261 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6263 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6264 ValRange);
6265 } else if (ID == ".amdhsa_tg_split") {
6266 if (!isGFX90A())
6267 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6268 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6269 ExprVal, ValRange);
6270 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6271 if (!supportsWGP(getSTI()))
6272 return Error(IDRange.Start,
6273 "directive unsupported on " + getSTI().getCPU(), IDRange);
6275 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_memory_ordered") {
6278 if (IVersion.Major < 10)
6279 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6281 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6282 ValRange);
6283 } else if (ID == ".amdhsa_forward_progress") {
6284 if (IVersion.Major < 10)
6285 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6287 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6288 ValRange);
6289 } else if (ID == ".amdhsa_shared_vgpr_count") {
6290 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6291 if (IVersion.Major < 10 || IVersion.Major >= 12)
6292 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6293 IDRange);
6294 SharedVGPRCount = Val;
6296 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6297 ValRange);
6298 } else if (ID == ".amdhsa_inst_pref_size") {
6299 if (IVersion.Major < 11)
6300 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6301 if (IVersion.Major == 11) {
6303 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6304 ValRange);
6305 } else {
6307 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6308 ValRange);
6309 }
6310 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6313 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6314 ExprVal, ValRange);
6315 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6317 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6318 ExprVal, ValRange);
6319 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6322 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6323 ExprVal, ValRange);
6324 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6326 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6327 ExprVal, ValRange);
6328 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6330 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6331 ExprVal, ValRange);
6332 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6334 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6335 ExprVal, ValRange);
6336 } else if (ID == ".amdhsa_exception_int_div_zero") {
6338 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6339 ExprVal, ValRange);
6340 } else if (ID == ".amdhsa_round_robin_scheduling") {
6341 if (IVersion.Major < 12)
6342 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6344 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6345 ValRange);
6346 } else {
6347 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6348 }
6349
6350#undef PARSE_BITS_ENTRY
6351 }
6352
6353 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6354 return TokError(".amdhsa_next_free_vgpr directive is required");
6355
6356 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6357 return TokError(".amdhsa_next_free_sgpr directive is required");
6358
6359 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6360
6361 // Consider the case where the total number of UserSGPRs with trailing
6362 // allocated preload SGPRs, is greater than the number of explicitly
6363 // referenced SGPRs.
6364 if (PreloadLength) {
6365 MCContext &Ctx = getContext();
6366 NextFreeSGPR = AMDGPUMCExpr::createMax(
6367 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6368 }
6369
6370 const MCExpr *VGPRBlocks;
6371 const MCExpr *SGPRBlocks;
6372 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6373 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6374 EnableWavefrontSize32, NextFreeVGPR,
6375 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6376 SGPRBlocks))
6377 return true;
6378
6379 int64_t EvaluatedVGPRBlocks;
6380 bool VGPRBlocksEvaluatable =
6381 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6382 if (VGPRBlocksEvaluatable &&
6384 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6385 return OutOfRangeError(VGPRRange);
6386 }
6388 KD.compute_pgm_rsrc1, VGPRBlocks,
6389 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6390 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6391
6392 int64_t EvaluatedSGPRBlocks;
6393 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6395 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6396 return OutOfRangeError(SGPRRange);
6398 KD.compute_pgm_rsrc1, SGPRBlocks,
6399 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6400 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6401
6402 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6403 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6404 "enabled user SGPRs");
6405
6406 if (isGFX1250Plus()) {
6408 return TokError("too many user SGPRs enabled");
6411 MCConstantExpr::create(UserSGPRCount, getContext()),
6412 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6413 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6414 } else {
6416 UserSGPRCount))
6417 return TokError("too many user SGPRs enabled");
6420 MCConstantExpr::create(UserSGPRCount, getContext()),
6421 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6422 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6423 }
6424
6425 int64_t IVal = 0;
6426 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6427 return TokError("Kernarg size should be resolvable");
6428 uint64_t kernarg_size = IVal;
6429 if (PreloadLength && kernarg_size &&
6430 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6431 return TokError("Kernarg preload length + offset is larger than the "
6432 "kernarg segment size");
6433
6434 if (isGFX90A()) {
6435 if (!Seen.contains(".amdhsa_accum_offset"))
6436 return TokError(".amdhsa_accum_offset directive is required");
6437 int64_t EvaluatedAccum;
6438 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6439 uint64_t UEvaluatedAccum = EvaluatedAccum;
6440 if (AccumEvaluatable &&
6441 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6442 return TokError("accum_offset should be in range [4..256] in "
6443 "increments of 4");
6444
6445 int64_t EvaluatedNumVGPR;
6446 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6447 AccumEvaluatable &&
6448 UEvaluatedAccum >
6449 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6450 return TokError("accum_offset exceeds total VGPR allocation");
6451 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6453 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6456 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6457 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6458 getContext());
6459 }
6460
6461 if (isGFX1250Plus())
6463 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6464 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6465 getContext());
6466
6467 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6468 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6469 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6470 return TokError("shared_vgpr_count directive not valid on "
6471 "wavefront size 32");
6472 }
6473
6474 if (VGPRBlocksEvaluatable &&
6475 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6476 63)) {
6477 return TokError("shared_vgpr_count*2 + "
6478 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6479 "exceed 63\n");
6480 }
6481 }
6482
6483 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6484 NextFreeVGPR, NextFreeSGPR,
6485 ReserveVCC, ReserveFlatScr);
6486 return false;
6487}
6488
6489bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6490 uint32_t Version;
6491 if (ParseAsAbsoluteExpression(Version))
6492 return true;
6493
6494 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6495 return false;
6496}
6497
6498bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6499 AMDGPUMCKernelCodeT &C) {
6500 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6501 // assembly for backwards compatibility.
6502 if (ID == "max_scratch_backing_memory_byte_size") {
6503 Parser.eatToEndOfStatement();
6504 return false;
6505 }
6506
6507 SmallString<40> ErrStr;
6508 raw_svector_ostream Err(ErrStr);
6509 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6510 return TokError(Err.str());
6511 }
6512 Lex();
6513
6514 if (ID == "enable_wavefront_size32") {
6515 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6516 if (!isGFX10Plus())
6517 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6518 if (!isWave32())
6519 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6520 } else {
6521 if (!isWave64())
6522 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6523 }
6524 }
6525
6526 if (ID == "wavefront_size") {
6527 if (C.wavefront_size == 5) {
6528 if (!isGFX10Plus())
6529 return TokError("wavefront_size=5 is only allowed on GFX10+");
6530 if (!isWave32())
6531 return TokError("wavefront_size=5 requires +WavefrontSize32");
6532 } else if (C.wavefront_size == 6) {
6533 if (!isWave64())
6534 return TokError("wavefront_size=6 requires +WavefrontSize64");
6535 }
6536 }
6537
6538 return false;
6539}
6540
6541bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6542 AMDGPUMCKernelCodeT KernelCode;
6543 KernelCode.initDefault(&getSTI(), getContext());
6544
6545 while (true) {
6546 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6547 // will set the current token to EndOfStatement.
6548 while(trySkipToken(AsmToken::EndOfStatement));
6549
6550 StringRef ID;
6551 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6552 return true;
6553
6554 if (ID == ".end_amd_kernel_code_t")
6555 break;
6556
6557 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6558 return true;
6559 }
6560
6561 KernelCode.validate(&getSTI(), getContext());
6562 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6563
6564 return false;
6565}
6566
6567bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6568 StringRef KernelName;
6569 if (!parseId(KernelName, "expected symbol name"))
6570 return true;
6571
6572 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6574
6575 KernelScope.initialize(getContext());
6576 return false;
6577}
6578
6579bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6580 if (!getSTI().getTargetTriple().isAMDGCN()) {
6581 return Error(getLoc(),
6582 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6583 "architectures");
6584 }
6585
6586 auto TargetIDDirective = getLexer().getTok().getStringContents();
6587 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6588 return Error(getParser().getTok().getLoc(), "target id must match options");
6589
6590 getTargetStreamer().EmitISAVersion();
6591 Lex();
6592
6593 return false;
6594}
6595
6596bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6597 assert(isHsaAbi(getSTI()));
6598
6599 std::string HSAMetadataString;
6600 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6601 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6602 return true;
6603
6604 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6605 return Error(getLoc(), "invalid HSA metadata");
6606
6607 return false;
6608}
6609
6610/// Common code to parse out a block of text (typically YAML) between start and
6611/// end directives.
6612bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6613 const char *AssemblerDirectiveEnd,
6614 std::string &CollectString) {
6615
6616 raw_string_ostream CollectStream(CollectString);
6617
6618 getLexer().setSkipSpace(false);
6619
6620 bool FoundEnd = false;
6621 while (!isToken(AsmToken::Eof)) {
6622 while (isToken(AsmToken::Space)) {
6623 CollectStream << getTokenStr();
6624 Lex();
6625 }
6626
6627 if (trySkipId(AssemblerDirectiveEnd)) {
6628 FoundEnd = true;
6629 break;
6630 }
6631
6632 CollectStream << Parser.parseStringToEndOfStatement()
6633 << getContext().getAsmInfo()->getSeparatorString();
6634
6635 Parser.eatToEndOfStatement();
6636 }
6637
6638 getLexer().setSkipSpace(true);
6639
6640 if (isToken(AsmToken::Eof) && !FoundEnd) {
6641 return TokError(Twine("expected directive ") +
6642 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6643 }
6644
6645 return false;
6646}
6647
6648/// Parse the assembler directive for new MsgPack-format PAL metadata.
6649bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6650 std::string String;
6651 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6653 return true;
6654
6655 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6656 if (!PALMetadata->setFromString(String))
6657 return Error(getLoc(), "invalid PAL metadata");
6658 return false;
6659}
6660
6661/// Parse the assembler directive for old linear-format PAL metadata.
6662bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6663 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6664 return Error(getLoc(),
6665 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6666 "not available on non-amdpal OSes")).str());
6667 }
6668
6669 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6670 PALMetadata->setLegacy();
6671 for (;;) {
6672 uint32_t Key, Value;
6673 if (ParseAsAbsoluteExpression(Key)) {
6674 return TokError(Twine("invalid value in ") +
6676 }
6677 if (!trySkipToken(AsmToken::Comma)) {
6678 return TokError(Twine("expected an even number of values in ") +
6680 }
6681 if (ParseAsAbsoluteExpression(Value)) {
6682 return TokError(Twine("invalid value in ") +
6684 }
6685 PALMetadata->setRegister(Key, Value);
6686 if (!trySkipToken(AsmToken::Comma))
6687 break;
6688 }
6689 return false;
6690}
6691
6692/// ParseDirectiveAMDGPULDS
6693/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6694bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6695 if (getParser().checkForValidSection())
6696 return true;
6697
6698 StringRef Name;
6699 SMLoc NameLoc = getLoc();
6700 if (getParser().parseIdentifier(Name))
6701 return TokError("expected identifier in directive");
6702
6703 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6704 if (getParser().parseComma())
6705 return true;
6706
6707 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6708
6709 int64_t Size;
6710 SMLoc SizeLoc = getLoc();
6711 if (getParser().parseAbsoluteExpression(Size))
6712 return true;
6713 if (Size < 0)
6714 return Error(SizeLoc, "size must be non-negative");
6715 if (Size > LocalMemorySize)
6716 return Error(SizeLoc, "size is too large");
6717
6718 int64_t Alignment = 4;
6719 if (trySkipToken(AsmToken::Comma)) {
6720 SMLoc AlignLoc = getLoc();
6721 if (getParser().parseAbsoluteExpression(Alignment))
6722 return true;
6723 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6724 return Error(AlignLoc, "alignment must be a power of two");
6725
6726 // Alignment larger than the size of LDS is possible in theory, as long
6727 // as the linker manages to place to symbol at address 0, but we do want
6728 // to make sure the alignment fits nicely into a 32-bit integer.
6729 if (Alignment >= 1u << 31)
6730 return Error(AlignLoc, "alignment is too large");
6731 }
6732
6733 if (parseEOL())
6734 return true;
6735
6736 Symbol->redefineIfPossible();
6737 if (!Symbol->isUndefined())
6738 return Error(NameLoc, "invalid symbol redefinition");
6739
6740 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6741 return false;
6742}
6743
6744bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6745 StringRef IDVal = DirectiveID.getString();
6746
6747 if (isHsaAbi(getSTI())) {
6748 if (IDVal == ".amdhsa_kernel")
6749 return ParseDirectiveAMDHSAKernel();
6750
6751 if (IDVal == ".amdhsa_code_object_version")
6752 return ParseDirectiveAMDHSACodeObjectVersion();
6753
6754 // TODO: Restructure/combine with PAL metadata directive.
6756 return ParseDirectiveHSAMetadata();
6757 } else {
6758 if (IDVal == ".amd_kernel_code_t")
6759 return ParseDirectiveAMDKernelCodeT();
6760
6761 if (IDVal == ".amdgpu_hsa_kernel")
6762 return ParseDirectiveAMDGPUHsaKernel();
6763
6764 if (IDVal == ".amd_amdgpu_isa")
6765 return ParseDirectiveISAVersion();
6766
6768 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6769 Twine(" directive is "
6770 "not available on non-amdhsa OSes"))
6771 .str());
6772 }
6773 }
6774
6775 if (IDVal == ".amdgcn_target")
6776 return ParseDirectiveAMDGCNTarget();
6777
6778 if (IDVal == ".amdgpu_lds")
6779 return ParseDirectiveAMDGPULDS();
6780
6781 if (IDVal == PALMD::AssemblerDirectiveBegin)
6782 return ParseDirectivePALMetadataBegin();
6783
6784 if (IDVal == PALMD::AssemblerDirective)
6785 return ParseDirectivePALMetadata();
6786
6787 return true;
6788}
6789
6790bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6791 MCRegister Reg) {
6792 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6793 return isGFX9Plus();
6794
6795 // GFX10+ has 2 more SGPRs 104 and 105.
6796 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6797 return hasSGPR104_SGPR105();
6798
6799 switch (Reg.id()) {
6800 case SRC_SHARED_BASE_LO:
6801 case SRC_SHARED_BASE:
6802 case SRC_SHARED_LIMIT_LO:
6803 case SRC_SHARED_LIMIT:
6804 case SRC_PRIVATE_BASE_LO:
6805 case SRC_PRIVATE_BASE:
6806 case SRC_PRIVATE_LIMIT_LO:
6807 case SRC_PRIVATE_LIMIT:
6808 return isGFX9Plus();
6809 case SRC_FLAT_SCRATCH_BASE_LO:
6810 case SRC_FLAT_SCRATCH_BASE_HI:
6811 return hasGloballyAddressableScratch();
6812 case SRC_POPS_EXITING_WAVE_ID:
6813 return isGFX9Plus() && !isGFX11Plus();
6814 case TBA:
6815 case TBA_LO:
6816 case TBA_HI:
6817 case TMA:
6818 case TMA_LO:
6819 case TMA_HI:
6820 return !isGFX9Plus();
6821 case XNACK_MASK:
6822 case XNACK_MASK_LO:
6823 case XNACK_MASK_HI:
6824 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6825 case SGPR_NULL:
6826 return isGFX10Plus();
6827 case SRC_EXECZ:
6828 case SRC_VCCZ:
6829 return !isGFX11Plus();
6830 default:
6831 break;
6832 }
6833
6834 if (isCI())
6835 return true;
6836
6837 if (isSI() || isGFX10Plus()) {
6838 // No flat_scr on SI.
6839 // On GFX10Plus flat scratch is not a valid register operand and can only be
6840 // accessed with s_setreg/s_getreg.
6841 switch (Reg.id()) {
6842 case FLAT_SCR:
6843 case FLAT_SCR_LO:
6844 case FLAT_SCR_HI:
6845 return false;
6846 default:
6847 return true;
6848 }
6849 }
6850
6851 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6852 // SI/CI have.
6853 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6854 return hasSGPR102_SGPR103();
6855
6856 return true;
6857}
6858
6859ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6860 StringRef Mnemonic,
6861 OperandMode Mode) {
6862 ParseStatus Res = parseVOPD(Operands);
6863 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6864 return Res;
6865
6866 // Try to parse with a custom parser
6867 Res = MatchOperandParserImpl(Operands, Mnemonic);
6868
6869 // If we successfully parsed the operand or if there as an error parsing,
6870 // we are done.
6871 //
6872 // If we are parsing after we reach EndOfStatement then this means we
6873 // are appending default values to the Operands list. This is only done
6874 // by custom parser, so we shouldn't continue on to the generic parsing.
6875 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6876 return Res;
6877
6878 SMLoc RBraceLoc;
6879 SMLoc LBraceLoc = getLoc();
6880 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6881 unsigned Prefix = Operands.size();
6882
6883 for (;;) {
6884 auto Loc = getLoc();
6885 Res = parseReg(Operands);
6886 if (Res.isNoMatch())
6887 Error(Loc, "expected a register");
6888 if (!Res.isSuccess())
6889 return ParseStatus::Failure;
6890
6891 RBraceLoc = getLoc();
6892 if (trySkipToken(AsmToken::RBrac))
6893 break;
6894
6895 if (!skipToken(AsmToken::Comma,
6896 "expected a comma or a closing square bracket"))
6897 return ParseStatus::Failure;
6898 }
6899
6900 if (Operands.size() - Prefix > 1) {
6901 Operands.insert(Operands.begin() + Prefix,
6902 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6903 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6904 }
6905
6906 return ParseStatus::Success;
6907 }
6908
6909 return parseRegOrImm(Operands);
6910}
6911
6912StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6913 // Clear any forced encodings from the previous instruction.
6914 setForcedEncodingSize(0);
6915 setForcedDPP(false);
6916 setForcedSDWA(false);
6917
6918 if (Name.consume_back("_e64_dpp")) {
6919 setForcedDPP(true);
6920 setForcedEncodingSize(64);
6921 return Name;
6922 }
6923 if (Name.consume_back("_e64")) {
6924 setForcedEncodingSize(64);
6925 return Name;
6926 }
6927 if (Name.consume_back("_e32")) {
6928 setForcedEncodingSize(32);
6929 return Name;
6930 }
6931 if (Name.consume_back("_dpp")) {
6932 setForcedDPP(true);
6933 return Name;
6934 }
6935 if (Name.consume_back("_sdwa")) {
6936 setForcedSDWA(true);
6937 return Name;
6938 }
6939 return Name;
6940}
6941
6942static void applyMnemonicAliases(StringRef &Mnemonic,
6943 const FeatureBitset &Features,
6944 unsigned VariantID);
6945
6946bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6947 StringRef Name, SMLoc NameLoc,
6948 OperandVector &Operands) {
6949 // Add the instruction mnemonic
6950 Name = parseMnemonicSuffix(Name);
6951
6952 // If the target architecture uses MnemonicAlias, call it here to parse
6953 // operands correctly.
6954 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6955
6956 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6957
6958 bool IsMIMG = Name.starts_with("image_");
6959
6960 while (!trySkipToken(AsmToken::EndOfStatement)) {
6961 OperandMode Mode = OperandMode_Default;
6962 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6963 Mode = OperandMode_NSA;
6964 ParseStatus Res = parseOperand(Operands, Name, Mode);
6965
6966 if (!Res.isSuccess()) {
6967 checkUnsupportedInstruction(Name, NameLoc);
6968 if (!Parser.hasPendingError()) {
6969 // FIXME: use real operand location rather than the current location.
6970 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6971 : "not a valid operand.";
6972 Error(getLoc(), Msg);
6973 }
6974 while (!trySkipToken(AsmToken::EndOfStatement)) {
6975 lex();
6976 }
6977 return true;
6978 }
6979
6980 // Eat the comma or space if there is one.
6981 trySkipToken(AsmToken::Comma);
6982 }
6983
6984 return false;
6985}
6986
6987//===----------------------------------------------------------------------===//
6988// Utility functions
6989//===----------------------------------------------------------------------===//
6990
6991ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6992 OperandVector &Operands) {
6993 SMLoc S = getLoc();
6994 if (!trySkipId(Name))
6995 return ParseStatus::NoMatch;
6996
6997 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6998 return ParseStatus::Success;
6999}
7000
7001ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7002 int64_t &IntVal) {
7003
7004 if (!trySkipId(Prefix, AsmToken::Colon))
7005 return ParseStatus::NoMatch;
7006
7008}
7009
7010ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7011 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7012 std::function<bool(int64_t &)> ConvertResult) {
7013 SMLoc S = getLoc();
7014 int64_t Value = 0;
7015
7016 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7017 if (!Res.isSuccess())
7018 return Res;
7019
7020 if (ConvertResult && !ConvertResult(Value)) {
7021 Error(S, "invalid " + StringRef(Prefix) + " value.");
7022 }
7023
7024 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7025 return ParseStatus::Success;
7026}
7027
7028ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7029 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7030 bool (*ConvertResult)(int64_t &)) {
7031 SMLoc S = getLoc();
7032 if (!trySkipId(Prefix, AsmToken::Colon))
7033 return ParseStatus::NoMatch;
7034
7035 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7036 return ParseStatus::Failure;
7037
7038 unsigned Val = 0;
7039 const unsigned MaxSize = 4;
7040
7041 // FIXME: How to verify the number of elements matches the number of src
7042 // operands?
7043 for (int I = 0; ; ++I) {
7044 int64_t Op;
7045 SMLoc Loc = getLoc();
7046 if (!parseExpr(Op))
7047 return ParseStatus::Failure;
7048
7049 if (Op != 0 && Op != 1)
7050 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7051
7052 Val |= (Op << I);
7053
7054 if (trySkipToken(AsmToken::RBrac))
7055 break;
7056
7057 if (I + 1 == MaxSize)
7058 return Error(getLoc(), "expected a closing square bracket");
7059
7060 if (!skipToken(AsmToken::Comma, "expected a comma"))
7061 return ParseStatus::Failure;
7062 }
7063
7064 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7065 return ParseStatus::Success;
7066}
7067
7068ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7069 OperandVector &Operands,
7070 AMDGPUOperand::ImmTy ImmTy,
7071 bool IgnoreNegative) {
7072 int64_t Bit;
7073 SMLoc S = getLoc();
7074
7075 if (trySkipId(Name)) {
7076 Bit = 1;
7077 } else if (trySkipId("no", Name)) {
7078 if (IgnoreNegative)
7079 return ParseStatus::Success;
7080 Bit = 0;
7081 } else {
7082 return ParseStatus::NoMatch;
7083 }
7084
7085 if (Name == "r128" && !hasMIMG_R128())
7086 return Error(S, "r128 modifier is not supported on this GPU");
7087 if (Name == "a16" && !hasA16())
7088 return Error(S, "a16 modifier is not supported on this GPU");
7089
7090 if (Bit == 0 && Name == "gds") {
7091 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7092 if (Mnemo.starts_with("ds_gws"))
7093 return Error(S, "nogds is not allowed");
7094 }
7095
7096 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7097 ImmTy = AMDGPUOperand::ImmTyR128A16;
7098
7099 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7100 return ParseStatus::Success;
7101}
7102
7103unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7104 bool &Disabling) const {
7105 Disabling = Id.consume_front("no");
7106
7107 if (isGFX940() && !Mnemo.starts_with("s_")) {
7108 return StringSwitch<unsigned>(Id)
7109 .Case("nt", AMDGPU::CPol::NT)
7110 .Case("sc0", AMDGPU::CPol::SC0)
7111 .Case("sc1", AMDGPU::CPol::SC1)
7112 .Default(0);
7113 }
7114
7115 return StringSwitch<unsigned>(Id)
7116 .Case("dlc", AMDGPU::CPol::DLC)
7117 .Case("glc", AMDGPU::CPol::GLC)
7118 .Case("scc", AMDGPU::CPol::SCC)
7119 .Case("slc", AMDGPU::CPol::SLC)
7120 .Default(0);
7121}
7122
7123ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7124 if (isGFX12Plus()) {
7125 SMLoc StringLoc = getLoc();
7126
7127 int64_t CPolVal = 0;
7128 ParseStatus ResTH = ParseStatus::NoMatch;
7129 ParseStatus ResScope = ParseStatus::NoMatch;
7130 ParseStatus ResNV = ParseStatus::NoMatch;
7131 ParseStatus ResScal = ParseStatus::NoMatch;
7132
7133 for (;;) {
7134 if (ResTH.isNoMatch()) {
7135 int64_t TH;
7136 ResTH = parseTH(Operands, TH);
7137 if (ResTH.isFailure())
7138 return ResTH;
7139 if (ResTH.isSuccess()) {
7140 CPolVal |= TH;
7141 continue;
7142 }
7143 }
7144
7145 if (ResScope.isNoMatch()) {
7146 int64_t Scope;
7147 ResScope = parseScope(Operands, Scope);
7148 if (ResScope.isFailure())
7149 return ResScope;
7150 if (ResScope.isSuccess()) {
7151 CPolVal |= Scope;
7152 continue;
7153 }
7154 }
7155
7156 // NV bit exists on GFX12+, but does something starting from GFX1250.
7157 // Allow parsing on all GFX12 and fail on validation for better
7158 // diagnostics.
7159 if (ResNV.isNoMatch()) {
7160 if (trySkipId("nv")) {
7161 ResNV = ParseStatus::Success;
7162 CPolVal |= CPol::NV;
7163 continue;
7164 } else if (trySkipId("no", "nv")) {
7165 ResNV = ParseStatus::Success;
7166 continue;
7167 }
7168 }
7169
7170 if (ResScal.isNoMatch()) {
7171 if (trySkipId("scale_offset")) {
7172 ResScal = ParseStatus::Success;
7173 CPolVal |= CPol::SCAL;
7174 continue;
7175 } else if (trySkipId("no", "scale_offset")) {
7176 ResScal = ParseStatus::Success;
7177 continue;
7178 }
7179 }
7180
7181 break;
7182 }
7183
7184 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7185 ResScal.isNoMatch())
7186 return ParseStatus::NoMatch;
7187
7188 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7189 AMDGPUOperand::ImmTyCPol));
7190 return ParseStatus::Success;
7191 }
7192
7193 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7194 SMLoc OpLoc = getLoc();
7195 unsigned Enabled = 0, Seen = 0;
7196 for (;;) {
7197 SMLoc S = getLoc();
7198 bool Disabling;
7199 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7200 if (!CPol)
7201 break;
7202
7203 lex();
7204
7205 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7206 return Error(S, "dlc modifier is not supported on this GPU");
7207
7208 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7209 return Error(S, "scc modifier is not supported on this GPU");
7210
7211 if (Seen & CPol)
7212 return Error(S, "duplicate cache policy modifier");
7213
7214 if (!Disabling)
7215 Enabled |= CPol;
7216
7217 Seen |= CPol;
7218 }
7219
7220 if (!Seen)
7221 return ParseStatus::NoMatch;
7222
7223 Operands.push_back(
7224 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7225 return ParseStatus::Success;
7226}
7227
7228ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7229 int64_t &Scope) {
7230 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7232
7233 ParseStatus Res = parseStringOrIntWithPrefix(
7234 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7235 Scope);
7236
7237 if (Res.isSuccess())
7238 Scope = Scopes[Scope];
7239
7240 return Res;
7241}
7242
7243ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7244 TH = AMDGPU::CPol::TH_RT; // default
7245
7246 StringRef Value;
7247 SMLoc StringLoc;
7248 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7249 if (!Res.isSuccess())
7250 return Res;
7251
7252 if (Value == "TH_DEFAULT")
7254 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7255 Value == "TH_LOAD_NT_WB") {
7256 return Error(StringLoc, "invalid th value");
7257 } else if (Value.consume_front("TH_ATOMIC_")) {
7259 } else if (Value.consume_front("TH_LOAD_")) {
7261 } else if (Value.consume_front("TH_STORE_")) {
7263 } else {
7264 return Error(StringLoc, "invalid th value");
7265 }
7266
7267 if (Value == "BYPASS")
7269
7270 if (TH != 0) {
7272 TH |= StringSwitch<int64_t>(Value)
7273 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7274 .Case("RT", AMDGPU::CPol::TH_RT)
7275 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7276 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7277 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7279 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7280 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7282 .Default(0xffffffff);
7283 else
7284 TH |= StringSwitch<int64_t>(Value)
7285 .Case("RT", AMDGPU::CPol::TH_RT)
7286 .Case("NT", AMDGPU::CPol::TH_NT)
7287 .Case("HT", AMDGPU::CPol::TH_HT)
7288 .Case("LU", AMDGPU::CPol::TH_LU)
7289 .Case("WB", AMDGPU::CPol::TH_WB)
7290 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7291 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7292 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7293 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7294 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7295 .Default(0xffffffff);
7296 }
7297
7298 if (TH == 0xffffffff)
7299 return Error(StringLoc, "invalid th value");
7300
7301 return ParseStatus::Success;
7302}
7303
7304static void
7306 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7307 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7308 std::optional<unsigned> InsertAt = std::nullopt) {
7309 auto i = OptionalIdx.find(ImmT);
7310 if (i != OptionalIdx.end()) {
7311 unsigned Idx = i->second;
7312 const AMDGPUOperand &Op =
7313 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7314 if (InsertAt)
7315 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7316 else
7317 Op.addImmOperands(Inst, 1);
7318 } else {
7319 if (InsertAt.has_value())
7320 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7321 else
7323 }
7324}
7325
7326ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7327 StringRef &Value,
7328 SMLoc &StringLoc) {
7329 if (!trySkipId(Prefix, AsmToken::Colon))
7330 return ParseStatus::NoMatch;
7331
7332 StringLoc = getLoc();
7333 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7335}
7336
7337ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7338 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7339 int64_t &IntVal) {
7340 if (!trySkipId(Name, AsmToken::Colon))
7341 return ParseStatus::NoMatch;
7342
7343 SMLoc StringLoc = getLoc();
7344
7345 StringRef Value;
7346 if (isToken(AsmToken::Identifier)) {
7347 Value = getTokenStr();
7348 lex();
7349
7350 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7351 if (Value == Ids[IntVal])
7352 break;
7353 } else if (!parseExpr(IntVal))
7354 return ParseStatus::Failure;
7355
7356 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7357 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7358
7359 return ParseStatus::Success;
7360}
7361
7362ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7363 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7364 AMDGPUOperand::ImmTy Type) {
7365 SMLoc S = getLoc();
7366 int64_t IntVal;
7367
7368 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7369 if (Res.isSuccess())
7370 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7371
7372 return Res;
7373}
7374
7375//===----------------------------------------------------------------------===//
7376// MTBUF format
7377//===----------------------------------------------------------------------===//
7378
7379bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7380 int64_t MaxVal,
7381 int64_t &Fmt) {
7382 int64_t Val;
7383 SMLoc Loc = getLoc();
7384
7385 auto Res = parseIntWithPrefix(Pref, Val);
7386 if (Res.isFailure())
7387 return false;
7388 if (Res.isNoMatch())
7389 return true;
7390
7391 if (Val < 0 || Val > MaxVal) {
7392 Error(Loc, Twine("out of range ", StringRef(Pref)));
7393 return false;
7394 }
7395
7396 Fmt = Val;
7397 return true;
7398}
7399
7400ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7401 AMDGPUOperand::ImmTy ImmTy) {
7402 const char *Pref = "index_key";
7403 int64_t ImmVal = 0;
7404 SMLoc Loc = getLoc();
7405 auto Res = parseIntWithPrefix(Pref, ImmVal);
7406 if (!Res.isSuccess())
7407 return Res;
7408
7409 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7410 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7411 (ImmVal < 0 || ImmVal > 1))
7412 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7413
7414 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7415 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7416
7417 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7418 return ParseStatus::Success;
7419}
7420
7421ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7422 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7423}
7424
7425ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7426 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7427}
7428
7429ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7430 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7431}
7432
7433ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7434 StringRef Name,
7435 AMDGPUOperand::ImmTy Type) {
7436 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7437 Type);
7438}
7439
7440ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7441 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7442 AMDGPUOperand::ImmTyMatrixAFMT);
7443}
7444
7445ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7446 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7447 AMDGPUOperand::ImmTyMatrixBFMT);
7448}
7449
7450ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7451 StringRef Name,
7452 AMDGPUOperand::ImmTy Type) {
7453 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7454 Type);
7455}
7456
7457ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7458 return tryParseMatrixScale(Operands, "matrix_a_scale",
7459 AMDGPUOperand::ImmTyMatrixAScale);
7460}
7461
7462ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7463 return tryParseMatrixScale(Operands, "matrix_b_scale",
7464 AMDGPUOperand::ImmTyMatrixBScale);
7465}
7466
7467ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7468 StringRef Name,
7469 AMDGPUOperand::ImmTy Type) {
7470 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7471 Type);
7472}
7473
7474ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7475 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7476 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7477}
7478
7479ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7480 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7481 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7482}
7483
7484// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7485// values to live in a joint format operand in the MCInst encoding.
7486ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7487 using namespace llvm::AMDGPU::MTBUFFormat;
7488
7489 int64_t Dfmt = DFMT_UNDEF;
7490 int64_t Nfmt = NFMT_UNDEF;
7491
7492 // dfmt and nfmt can appear in either order, and each is optional.
7493 for (int I = 0; I < 2; ++I) {
7494 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7495 return ParseStatus::Failure;
7496
7497 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7498 return ParseStatus::Failure;
7499
7500 // Skip optional comma between dfmt/nfmt
7501 // but guard against 2 commas following each other.
7502 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7503 !peekToken().is(AsmToken::Comma)) {
7504 trySkipToken(AsmToken::Comma);
7505 }
7506 }
7507
7508 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7509 return ParseStatus::NoMatch;
7510
7511 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7512 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7513
7514 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7515 return ParseStatus::Success;
7516}
7517
7518ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7519 using namespace llvm::AMDGPU::MTBUFFormat;
7520
7521 int64_t Fmt = UFMT_UNDEF;
7522
7523 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7524 return ParseStatus::Failure;
7525
7526 if (Fmt == UFMT_UNDEF)
7527 return ParseStatus::NoMatch;
7528
7529 Format = Fmt;
7530 return ParseStatus::Success;
7531}
7532
7533bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7534 int64_t &Nfmt,
7535 StringRef FormatStr,
7536 SMLoc Loc) {
7537 using namespace llvm::AMDGPU::MTBUFFormat;
7538 int64_t Format;
7539
7540 Format = getDfmt(FormatStr);
7541 if (Format != DFMT_UNDEF) {
7542 Dfmt = Format;
7543 return true;
7544 }
7545
7546 Format = getNfmt(FormatStr, getSTI());
7547 if (Format != NFMT_UNDEF) {
7548 Nfmt = Format;
7549 return true;
7550 }
7551
7552 Error(Loc, "unsupported format");
7553 return false;
7554}
7555
7556ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7557 SMLoc FormatLoc,
7558 int64_t &Format) {
7559 using namespace llvm::AMDGPU::MTBUFFormat;
7560
7561 int64_t Dfmt = DFMT_UNDEF;
7562 int64_t Nfmt = NFMT_UNDEF;
7563 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7564 return ParseStatus::Failure;
7565
7566 if (trySkipToken(AsmToken::Comma)) {
7567 StringRef Str;
7568 SMLoc Loc = getLoc();
7569 if (!parseId(Str, "expected a format string") ||
7570 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7571 return ParseStatus::Failure;
7572 if (Dfmt == DFMT_UNDEF)
7573 return Error(Loc, "duplicate numeric format");
7574 if (Nfmt == NFMT_UNDEF)
7575 return Error(Loc, "duplicate data format");
7576 }
7577
7578 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7579 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7580
7581 if (isGFX10Plus()) {
7582 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7583 if (Ufmt == UFMT_UNDEF)
7584 return Error(FormatLoc, "unsupported format");
7585 Format = Ufmt;
7586 } else {
7587 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7588 }
7589
7590 return ParseStatus::Success;
7591}
7592
7593ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7594 SMLoc Loc,
7595 int64_t &Format) {
7596 using namespace llvm::AMDGPU::MTBUFFormat;
7597
7598 auto Id = getUnifiedFormat(FormatStr, getSTI());
7599 if (Id == UFMT_UNDEF)
7600 return ParseStatus::NoMatch;
7601
7602 if (!isGFX10Plus())
7603 return Error(Loc, "unified format is not supported on this GPU");
7604
7605 Format = Id;
7606 return ParseStatus::Success;
7607}
7608
7609ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7610 using namespace llvm::AMDGPU::MTBUFFormat;
7611 SMLoc Loc = getLoc();
7612
7613 if (!parseExpr(Format))
7614 return ParseStatus::Failure;
7615 if (!isValidFormatEncoding(Format, getSTI()))
7616 return Error(Loc, "out of range format");
7617
7618 return ParseStatus::Success;
7619}
7620
7621ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7622 using namespace llvm::AMDGPU::MTBUFFormat;
7623
7624 if (!trySkipId("format", AsmToken::Colon))
7625 return ParseStatus::NoMatch;
7626
7627 if (trySkipToken(AsmToken::LBrac)) {
7628 StringRef FormatStr;
7629 SMLoc Loc = getLoc();
7630 if (!parseId(FormatStr, "expected a format string"))
7631 return ParseStatus::Failure;
7632
7633 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7634 if (Res.isNoMatch())
7635 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7636 if (!Res.isSuccess())
7637 return Res;
7638
7639 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7640 return ParseStatus::Failure;
7641
7642 return ParseStatus::Success;
7643 }
7644
7645 return parseNumericFormat(Format);
7646}
7647
7648ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7649 using namespace llvm::AMDGPU::MTBUFFormat;
7650
7651 int64_t Format = getDefaultFormatEncoding(getSTI());
7652 ParseStatus Res;
7653 SMLoc Loc = getLoc();
7654
7655 // Parse legacy format syntax.
7656 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7657 if (Res.isFailure())
7658 return Res;
7659
7660 bool FormatFound = Res.isSuccess();
7661
7662 Operands.push_back(
7663 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7664
7665 if (FormatFound)
7666 trySkipToken(AsmToken::Comma);
7667
7668 if (isToken(AsmToken::EndOfStatement)) {
7669 // We are expecting an soffset operand,
7670 // but let matcher handle the error.
7671 return ParseStatus::Success;
7672 }
7673
7674 // Parse soffset.
7675 Res = parseRegOrImm(Operands);
7676 if (!Res.isSuccess())
7677 return Res;
7678
7679 trySkipToken(AsmToken::Comma);
7680
7681 if (!FormatFound) {
7682 Res = parseSymbolicOrNumericFormat(Format);
7683 if (Res.isFailure())
7684 return Res;
7685 if (Res.isSuccess()) {
7686 auto Size = Operands.size();
7687 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7688 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7689 Op.setImm(Format);
7690 }
7691 return ParseStatus::Success;
7692 }
7693
7694 if (isId("format") && peekToken().is(AsmToken::Colon))
7695 return Error(getLoc(), "duplicate format");
7696 return ParseStatus::Success;
7697}
7698
7699ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7700 ParseStatus Res =
7701 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7702 if (Res.isNoMatch()) {
7703 Res = parseIntWithPrefix("inst_offset", Operands,
7704 AMDGPUOperand::ImmTyInstOffset);
7705 }
7706 return Res;
7707}
7708
7709ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7710 ParseStatus Res =
7711 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7712 if (Res.isNoMatch())
7713 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7714 return Res;
7715}
7716
7717ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7718 ParseStatus Res =
7719 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7720 if (Res.isNoMatch()) {
7721 Res =
7722 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7723 }
7724 return Res;
7725}
7726
7727//===----------------------------------------------------------------------===//
7728// Exp
7729//===----------------------------------------------------------------------===//
7730
7731void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7732 OptionalImmIndexMap OptionalIdx;
7733
7734 unsigned OperandIdx[4];
7735 unsigned EnMask = 0;
7736 int SrcIdx = 0;
7737
7738 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7740
7741 // Add the register arguments
7742 if (Op.isReg()) {
7743 assert(SrcIdx < 4);
7744 OperandIdx[SrcIdx] = Inst.size();
7745 Op.addRegOperands(Inst, 1);
7746 ++SrcIdx;
7747 continue;
7748 }
7749
7750 if (Op.isOff()) {
7751 assert(SrcIdx < 4);
7752 OperandIdx[SrcIdx] = Inst.size();
7753 Inst.addOperand(MCOperand::createReg(MCRegister()));
7754 ++SrcIdx;
7755 continue;
7756 }
7757
7758 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7759 Op.addImmOperands(Inst, 1);
7760 continue;
7761 }
7762
7763 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7764 continue;
7765
7766 // Handle optional arguments
7767 OptionalIdx[Op.getImmTy()] = i;
7768 }
7769
7770 assert(SrcIdx == 4);
7771
7772 bool Compr = false;
7773 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7774 Compr = true;
7775 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7776 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7777 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7778 }
7779
7780 for (auto i = 0; i < SrcIdx; ++i) {
7781 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7782 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7783 }
7784 }
7785
7786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7788
7789 Inst.addOperand(MCOperand::createImm(EnMask));
7790}
7791
7792//===----------------------------------------------------------------------===//
7793// s_waitcnt
7794//===----------------------------------------------------------------------===//
7795
7796static bool
7798 const AMDGPU::IsaVersion ISA,
7799 int64_t &IntVal,
7800 int64_t CntVal,
7801 bool Saturate,
7802 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7803 unsigned (*decode)(const IsaVersion &Version, unsigned))
7804{
7805 bool Failed = false;
7806
7807 IntVal = encode(ISA, IntVal, CntVal);
7808 if (CntVal != decode(ISA, IntVal)) {
7809 if (Saturate) {
7810 IntVal = encode(ISA, IntVal, -1);
7811 } else {
7812 Failed = true;
7813 }
7814 }
7815 return Failed;
7816}
7817
7818bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7819
7820 SMLoc CntLoc = getLoc();
7821 StringRef CntName = getTokenStr();
7822
7823 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7824 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7825 return false;
7826
7827 int64_t CntVal;
7828 SMLoc ValLoc = getLoc();
7829 if (!parseExpr(CntVal))
7830 return false;
7831
7832 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7833
7834 bool Failed = true;
7835 bool Sat = CntName.ends_with("_sat");
7836
7837 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7838 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7839 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7840 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7841 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7842 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7843 } else {
7844 Error(CntLoc, "invalid counter name " + CntName);
7845 return false;
7846 }
7847
7848 if (Failed) {
7849 Error(ValLoc, "too large value for " + CntName);
7850 return false;
7851 }
7852
7853 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7854 return false;
7855
7856 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7857 if (isToken(AsmToken::EndOfStatement)) {
7858 Error(getLoc(), "expected a counter name");
7859 return false;
7860 }
7861 }
7862
7863 return true;
7864}
7865
7866ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7867 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7868 int64_t Waitcnt = getWaitcntBitMask(ISA);
7869 SMLoc S = getLoc();
7870
7871 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7872 while (!isToken(AsmToken::EndOfStatement)) {
7873 if (!parseCnt(Waitcnt))
7874 return ParseStatus::Failure;
7875 }
7876 } else {
7877 if (!parseExpr(Waitcnt))
7878 return ParseStatus::Failure;
7879 }
7880
7881 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7882 return ParseStatus::Success;
7883}
7884
7885bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7886 SMLoc FieldLoc = getLoc();
7887 StringRef FieldName = getTokenStr();
7888 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7889 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7890 return false;
7891
7892 SMLoc ValueLoc = getLoc();
7893 StringRef ValueName = getTokenStr();
7894 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7895 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7896 return false;
7897
7898 unsigned Shift;
7899 if (FieldName == "instid0") {
7900 Shift = 0;
7901 } else if (FieldName == "instskip") {
7902 Shift = 4;
7903 } else if (FieldName == "instid1") {
7904 Shift = 7;
7905 } else {
7906 Error(FieldLoc, "invalid field name " + FieldName);
7907 return false;
7908 }
7909
7910 int Value;
7911 if (Shift == 4) {
7912 // Parse values for instskip.
7913 Value = StringSwitch<int>(ValueName)
7914 .Case("SAME", 0)
7915 .Case("NEXT", 1)
7916 .Case("SKIP_1", 2)
7917 .Case("SKIP_2", 3)
7918 .Case("SKIP_3", 4)
7919 .Case("SKIP_4", 5)
7920 .Default(-1);
7921 } else {
7922 // Parse values for instid0 and instid1.
7923 Value = StringSwitch<int>(ValueName)
7924 .Case("NO_DEP", 0)
7925 .Case("VALU_DEP_1", 1)
7926 .Case("VALU_DEP_2", 2)
7927 .Case("VALU_DEP_3", 3)
7928 .Case("VALU_DEP_4", 4)
7929 .Case("TRANS32_DEP_1", 5)
7930 .Case("TRANS32_DEP_2", 6)
7931 .Case("TRANS32_DEP_3", 7)
7932 .Case("FMA_ACCUM_CYCLE_1", 8)
7933 .Case("SALU_CYCLE_1", 9)
7934 .Case("SALU_CYCLE_2", 10)
7935 .Case("SALU_CYCLE_3", 11)
7936 .Default(-1);
7937 }
7938 if (Value < 0) {
7939 Error(ValueLoc, "invalid value name " + ValueName);
7940 return false;
7941 }
7942
7943 Delay |= Value << Shift;
7944 return true;
7945}
7946
7947ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7948 int64_t Delay = 0;
7949 SMLoc S = getLoc();
7950
7951 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7952 do {
7953 if (!parseDelay(Delay))
7954 return ParseStatus::Failure;
7955 } while (trySkipToken(AsmToken::Pipe));
7956 } else {
7957 if (!parseExpr(Delay))
7958 return ParseStatus::Failure;
7959 }
7960
7961 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7962 return ParseStatus::Success;
7963}
7964
7965bool
7966AMDGPUOperand::isSWaitCnt() const {
7967 return isImm();
7968}
7969
7970bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7971
7972//===----------------------------------------------------------------------===//
7973// DepCtr
7974//===----------------------------------------------------------------------===//
7975
7976void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7977 StringRef DepCtrName) {
7978 switch (ErrorId) {
7979 case OPR_ID_UNKNOWN:
7980 Error(Loc, Twine("invalid counter name ", DepCtrName));
7981 return;
7982 case OPR_ID_UNSUPPORTED:
7983 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7984 return;
7985 case OPR_ID_DUPLICATE:
7986 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7987 return;
7988 case OPR_VAL_INVALID:
7989 Error(Loc, Twine("invalid value for ", DepCtrName));
7990 return;
7991 default:
7992 assert(false);
7993 }
7994}
7995
7996bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7997
7998 using namespace llvm::AMDGPU::DepCtr;
7999
8000 SMLoc DepCtrLoc = getLoc();
8001 StringRef DepCtrName = getTokenStr();
8002
8003 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8004 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8005 return false;
8006
8007 int64_t ExprVal;
8008 if (!parseExpr(ExprVal))
8009 return false;
8010
8011 unsigned PrevOprMask = UsedOprMask;
8012 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8013
8014 if (CntVal < 0) {
8015 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8016 return false;
8017 }
8018
8019 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8020 return false;
8021
8022 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8023 if (isToken(AsmToken::EndOfStatement)) {
8024 Error(getLoc(), "expected a counter name");
8025 return false;
8026 }
8027 }
8028
8029 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8030 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8031 return true;
8032}
8033
8034ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8035 using namespace llvm::AMDGPU::DepCtr;
8036
8037 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8038 SMLoc Loc = getLoc();
8039
8040 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8041 unsigned UsedOprMask = 0;
8042 while (!isToken(AsmToken::EndOfStatement)) {
8043 if (!parseDepCtr(DepCtr, UsedOprMask))
8044 return ParseStatus::Failure;
8045 }
8046 } else {
8047 if (!parseExpr(DepCtr))
8048 return ParseStatus::Failure;
8049 }
8050
8051 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8052 return ParseStatus::Success;
8053}
8054
8055bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8056
8057//===----------------------------------------------------------------------===//
8058// hwreg
8059//===----------------------------------------------------------------------===//
8060
8061ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8062 OperandInfoTy &Offset,
8063 OperandInfoTy &Width) {
8064 using namespace llvm::AMDGPU::Hwreg;
8065
8066 if (!trySkipId("hwreg", AsmToken::LParen))
8067 return ParseStatus::NoMatch;
8068
8069 // The register may be specified by name or using a numeric code
8070 HwReg.Loc = getLoc();
8071 if (isToken(AsmToken::Identifier) &&
8072 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8073 HwReg.IsSymbolic = true;
8074 lex(); // skip register name
8075 } else if (!parseExpr(HwReg.Val, "a register name")) {
8076 return ParseStatus::Failure;
8077 }
8078
8079 if (trySkipToken(AsmToken::RParen))
8080 return ParseStatus::Success;
8081
8082 // parse optional params
8083 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8084 return ParseStatus::Failure;
8085
8086 Offset.Loc = getLoc();
8087 if (!parseExpr(Offset.Val))
8088 return ParseStatus::Failure;
8089
8090 if (!skipToken(AsmToken::Comma, "expected a comma"))
8091 return ParseStatus::Failure;
8092
8093 Width.Loc = getLoc();
8094 if (!parseExpr(Width.Val) ||
8095 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8096 return ParseStatus::Failure;
8097
8098 return ParseStatus::Success;
8099}
8100
8101ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8102 using namespace llvm::AMDGPU::Hwreg;
8103
8104 int64_t ImmVal = 0;
8105 SMLoc Loc = getLoc();
8106
8107 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8108 HwregId::Default);
8109 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8110 HwregOffset::Default);
8111 struct : StructuredOpField {
8112 using StructuredOpField::StructuredOpField;
8113 bool validate(AMDGPUAsmParser &Parser) const override {
8114 if (!isUIntN(Width, Val - 1))
8115 return Error(Parser, "only values from 1 to 32 are legal");
8116 return true;
8117 }
8118 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8119 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8120
8121 if (Res.isNoMatch())
8122 Res = parseHwregFunc(HwReg, Offset, Width);
8123
8124 if (Res.isSuccess()) {
8125 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8126 return ParseStatus::Failure;
8127 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8128 }
8129
8130 if (Res.isNoMatch() &&
8131 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8133
8134 if (!Res.isSuccess())
8135 return ParseStatus::Failure;
8136
8137 if (!isUInt<16>(ImmVal))
8138 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8139 Operands.push_back(
8140 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8141 return ParseStatus::Success;
8142}
8143
8144bool AMDGPUOperand::isHwreg() const {
8145 return isImmTy(ImmTyHwreg);
8146}
8147
8148//===----------------------------------------------------------------------===//
8149// sendmsg
8150//===----------------------------------------------------------------------===//
8151
8152bool
8153AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8154 OperandInfoTy &Op,
8155 OperandInfoTy &Stream) {
8156 using namespace llvm::AMDGPU::SendMsg;
8157
8158 Msg.Loc = getLoc();
8159 if (isToken(AsmToken::Identifier) &&
8160 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8161 Msg.IsSymbolic = true;
8162 lex(); // skip message name
8163 } else if (!parseExpr(Msg.Val, "a message name")) {
8164 return false;
8165 }
8166
8167 if (trySkipToken(AsmToken::Comma)) {
8168 Op.IsDefined = true;
8169 Op.Loc = getLoc();
8170 if (isToken(AsmToken::Identifier) &&
8171 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8173 lex(); // skip operation name
8174 } else if (!parseExpr(Op.Val, "an operation name")) {
8175 return false;
8176 }
8177
8178 if (trySkipToken(AsmToken::Comma)) {
8179 Stream.IsDefined = true;
8180 Stream.Loc = getLoc();
8181 if (!parseExpr(Stream.Val))
8182 return false;
8183 }
8184 }
8185
8186 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8187}
8188
8189bool
8190AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8191 const OperandInfoTy &Op,
8192 const OperandInfoTy &Stream) {
8193 using namespace llvm::AMDGPU::SendMsg;
8194
8195 // Validation strictness depends on whether message is specified
8196 // in a symbolic or in a numeric form. In the latter case
8197 // only encoding possibility is checked.
8198 bool Strict = Msg.IsSymbolic;
8199
8200 if (Strict) {
8201 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8202 Error(Msg.Loc, "specified message id is not supported on this GPU");
8203 return false;
8204 }
8205 } else {
8206 if (!isValidMsgId(Msg.Val, getSTI())) {
8207 Error(Msg.Loc, "invalid message id");
8208 return false;
8209 }
8210 }
8211 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8212 if (Op.IsDefined) {
8213 Error(Op.Loc, "message does not support operations");
8214 } else {
8215 Error(Msg.Loc, "missing message operation");
8216 }
8217 return false;
8218 }
8219 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8220 if (Op.Val == OPR_ID_UNSUPPORTED)
8221 Error(Op.Loc, "specified operation id is not supported on this GPU");
8222 else
8223 Error(Op.Loc, "invalid operation id");
8224 return false;
8225 }
8226 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8227 Stream.IsDefined) {
8228 Error(Stream.Loc, "message operation does not support streams");
8229 return false;
8230 }
8231 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8232 Error(Stream.Loc, "invalid message stream id");
8233 return false;
8234 }
8235 return true;
8236}
8237
8238ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8239 using namespace llvm::AMDGPU::SendMsg;
8240
8241 int64_t ImmVal = 0;
8242 SMLoc Loc = getLoc();
8243
8244 if (trySkipId("sendmsg", AsmToken::LParen)) {
8245 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8246 OperandInfoTy Op(OP_NONE_);
8247 OperandInfoTy Stream(STREAM_ID_NONE_);
8248 if (parseSendMsgBody(Msg, Op, Stream) &&
8249 validateSendMsg(Msg, Op, Stream)) {
8250 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8251 } else {
8252 return ParseStatus::Failure;
8253 }
8254 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8255 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8256 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8257 } else {
8258 return ParseStatus::Failure;
8259 }
8260
8261 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8262 return ParseStatus::Success;
8263}
8264
8265bool AMDGPUOperand::isSendMsg() const {
8266 return isImmTy(ImmTySendMsg);
8267}
8268
8269ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8270 using namespace llvm::AMDGPU::WaitEvent;
8271
8272 SMLoc Loc = getLoc();
8273 int64_t ImmVal = 0;
8274
8275 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8276 1, 0);
8277 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8278
8279 StructuredOpField *TargetBitfield =
8280 isGFX11() ? &DontWaitExportReady : &ExportReady;
8281
8282 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8283 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8285 else if (Res.isSuccess()) {
8286 if (!validateStructuredOpFields({TargetBitfield}))
8287 return ParseStatus::Failure;
8288 ImmVal = TargetBitfield->Val;
8289 }
8290
8291 if (!Res.isSuccess())
8292 return ParseStatus::Failure;
8293
8294 if (!isUInt<16>(ImmVal))
8295 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8296
8297 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8298 AMDGPUOperand::ImmTyWaitEvent));
8299 return ParseStatus::Success;
8300}
8301
8302bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8303
8304//===----------------------------------------------------------------------===//
8305// v_interp
8306//===----------------------------------------------------------------------===//
8307
8308ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8309 StringRef Str;
8310 SMLoc S = getLoc();
8311
8312 if (!parseId(Str))
8313 return ParseStatus::NoMatch;
8314
8315 int Slot = StringSwitch<int>(Str)
8316 .Case("p10", 0)
8317 .Case("p20", 1)
8318 .Case("p0", 2)
8319 .Default(-1);
8320
8321 if (Slot == -1)
8322 return Error(S, "invalid interpolation slot");
8323
8324 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8325 AMDGPUOperand::ImmTyInterpSlot));
8326 return ParseStatus::Success;
8327}
8328
8329ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8330 StringRef Str;
8331 SMLoc S = getLoc();
8332
8333 if (!parseId(Str))
8334 return ParseStatus::NoMatch;
8335
8336 if (!Str.starts_with("attr"))
8337 return Error(S, "invalid interpolation attribute");
8338
8339 StringRef Chan = Str.take_back(2);
8340 int AttrChan = StringSwitch<int>(Chan)
8341 .Case(".x", 0)
8342 .Case(".y", 1)
8343 .Case(".z", 2)
8344 .Case(".w", 3)
8345 .Default(-1);
8346 if (AttrChan == -1)
8347 return Error(S, "invalid or missing interpolation attribute channel");
8348
8349 Str = Str.drop_back(2).drop_front(4);
8350
8351 uint8_t Attr;
8352 if (Str.getAsInteger(10, Attr))
8353 return Error(S, "invalid or missing interpolation attribute number");
8354
8355 if (Attr > 32)
8356 return Error(S, "out of bounds interpolation attribute number");
8357
8358 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8359
8360 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8361 AMDGPUOperand::ImmTyInterpAttr));
8362 Operands.push_back(AMDGPUOperand::CreateImm(
8363 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8364 return ParseStatus::Success;
8365}
8366
8367//===----------------------------------------------------------------------===//
8368// exp
8369//===----------------------------------------------------------------------===//
8370
8371ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8372 using namespace llvm::AMDGPU::Exp;
8373
8374 StringRef Str;
8375 SMLoc S = getLoc();
8376
8377 if (!parseId(Str))
8378 return ParseStatus::NoMatch;
8379
8380 unsigned Id = getTgtId(Str);
8381 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8382 return Error(S, (Id == ET_INVALID)
8383 ? "invalid exp target"
8384 : "exp target is not supported on this GPU");
8385
8386 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8387 AMDGPUOperand::ImmTyExpTgt));
8388 return ParseStatus::Success;
8389}
8390
8391//===----------------------------------------------------------------------===//
8392// parser helpers
8393//===----------------------------------------------------------------------===//
8394
8395bool
8396AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8397 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8398}
8399
8400bool
8401AMDGPUAsmParser::isId(const StringRef Id) const {
8402 return isId(getToken(), Id);
8403}
8404
8405bool
8406AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8407 return getTokenKind() == Kind;
8408}
8409
8410StringRef AMDGPUAsmParser::getId() const {
8411 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8412}
8413
8414bool
8415AMDGPUAsmParser::trySkipId(const StringRef Id) {
8416 if (isId(Id)) {
8417 lex();
8418 return true;
8419 }
8420 return false;
8421}
8422
8423bool
8424AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8425 if (isToken(AsmToken::Identifier)) {
8426 StringRef Tok = getTokenStr();
8427 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8428 lex();
8429 return true;
8430 }
8431 }
8432 return false;
8433}
8434
8435bool
8436AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8437 if (isId(Id) && peekToken().is(Kind)) {
8438 lex();
8439 lex();
8440 return true;
8441 }
8442 return false;
8443}
8444
8445bool
8446AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8447 if (isToken(Kind)) {
8448 lex();
8449 return true;
8450 }
8451 return false;
8452}
8453
8454bool
8455AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8456 const StringRef ErrMsg) {
8457 if (!trySkipToken(Kind)) {
8458 Error(getLoc(), ErrMsg);
8459 return false;
8460 }
8461 return true;
8462}
8463
8464bool
8465AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8466 SMLoc S = getLoc();
8467
8468 const MCExpr *Expr;
8469 if (Parser.parseExpression(Expr))
8470 return false;
8471
8472 if (Expr->evaluateAsAbsolute(Imm))
8473 return true;
8474
8475 if (Expected.empty()) {
8476 Error(S, "expected absolute expression");
8477 } else {
8478 Error(S, Twine("expected ", Expected) +
8479 Twine(" or an absolute expression"));
8480 }
8481 return false;
8482}
8483
8484bool
8485AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8486 SMLoc S = getLoc();
8487
8488 const MCExpr *Expr;
8489 if (Parser.parseExpression(Expr))
8490 return false;
8491
8492 int64_t IntVal;
8493 if (Expr->evaluateAsAbsolute(IntVal)) {
8494 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8495 } else {
8496 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8497 }
8498 return true;
8499}
8500
8501bool
8502AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8503 if (isToken(AsmToken::String)) {
8504 Val = getToken().getStringContents();
8505 lex();
8506 return true;
8507 }
8508 Error(getLoc(), ErrMsg);
8509 return false;
8510}
8511
8512bool
8513AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8514 if (isToken(AsmToken::Identifier)) {
8515 Val = getTokenStr();
8516 lex();
8517 return true;
8518 }
8519 if (!ErrMsg.empty())
8520 Error(getLoc(), ErrMsg);
8521 return false;
8522}
8523
8524AsmToken
8525AMDGPUAsmParser::getToken() const {
8526 return Parser.getTok();
8527}
8528
8529AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8530 return isToken(AsmToken::EndOfStatement)
8531 ? getToken()
8532 : getLexer().peekTok(ShouldSkipSpace);
8533}
8534
8535void
8536AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8537 auto TokCount = getLexer().peekTokens(Tokens);
8538
8539 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8540 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8541}
8542
8544AMDGPUAsmParser::getTokenKind() const {
8545 return getLexer().getKind();
8546}
8547
8548SMLoc
8549AMDGPUAsmParser::getLoc() const {
8550 return getToken().getLoc();
8551}
8552
8553StringRef
8554AMDGPUAsmParser::getTokenStr() const {
8555 return getToken().getString();
8556}
8557
8558void
8559AMDGPUAsmParser::lex() {
8560 Parser.Lex();
8561}
8562
8563SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8564 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8565}
8566
8567// Returns one of the given locations that comes later in the source.
8568SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8569 return a.getPointer() < b.getPointer() ? b : a;
8570}
8571
8572SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8573 int MCOpIdx) const {
8574 for (const auto &Op : Operands) {
8575 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8576 if (TargetOp.getMCOpIdx() == MCOpIdx)
8577 return TargetOp.getStartLoc();
8578 }
8579 llvm_unreachable("No such MC operand!");
8580}
8581
8582SMLoc
8583AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8584 const OperandVector &Operands) const {
8585 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8587 if (Test(Op))
8588 return Op.getStartLoc();
8589 }
8590 return getInstLoc(Operands);
8591}
8592
8593SMLoc
8594AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8595 const OperandVector &Operands) const {
8596 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8597 return getOperandLoc(Test, Operands);
8598}
8599
8600ParseStatus
8601AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8602 if (!trySkipToken(AsmToken::LCurly))
8603 return ParseStatus::NoMatch;
8604
8605 bool First = true;
8606 while (!trySkipToken(AsmToken::RCurly)) {
8607 if (!First &&
8608 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8609 return ParseStatus::Failure;
8610
8611 StringRef Id = getTokenStr();
8612 SMLoc IdLoc = getLoc();
8613 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8614 !skipToken(AsmToken::Colon, "colon expected"))
8615 return ParseStatus::Failure;
8616
8617 const auto *I =
8618 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8619 if (I == Fields.end())
8620 return Error(IdLoc, "unknown field");
8621 if ((*I)->IsDefined)
8622 return Error(IdLoc, "duplicate field");
8623
8624 // TODO: Support symbolic values.
8625 (*I)->Loc = getLoc();
8626 if (!parseExpr((*I)->Val))
8627 return ParseStatus::Failure;
8628 (*I)->IsDefined = true;
8629
8630 First = false;
8631 }
8632 return ParseStatus::Success;
8633}
8634
8635bool AMDGPUAsmParser::validateStructuredOpFields(
8637 return all_of(Fields, [this](const StructuredOpField *F) {
8638 return F->validate(*this);
8639 });
8640}
8641
8642//===----------------------------------------------------------------------===//
8643// swizzle
8644//===----------------------------------------------------------------------===//
8645
8647static unsigned
8648encodeBitmaskPerm(const unsigned AndMask,
8649 const unsigned OrMask,
8650 const unsigned XorMask) {
8651 using namespace llvm::AMDGPU::Swizzle;
8652
8653 return BITMASK_PERM_ENC |
8654 (AndMask << BITMASK_AND_SHIFT) |
8655 (OrMask << BITMASK_OR_SHIFT) |
8656 (XorMask << BITMASK_XOR_SHIFT);
8657}
8658
8659bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8660 const unsigned MaxVal,
8661 const Twine &ErrMsg, SMLoc &Loc) {
8662 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8663 return false;
8664 }
8665 Loc = getLoc();
8666 if (!parseExpr(Op)) {
8667 return false;
8668 }
8669 if (Op < MinVal || Op > MaxVal) {
8670 Error(Loc, ErrMsg);
8671 return false;
8672 }
8673
8674 return true;
8675}
8676
8677bool
8678AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8679 const unsigned MinVal,
8680 const unsigned MaxVal,
8681 const StringRef ErrMsg) {
8682 SMLoc Loc;
8683 for (unsigned i = 0; i < OpNum; ++i) {
8684 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8685 return false;
8686 }
8687
8688 return true;
8689}
8690
8691bool
8692AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8693 using namespace llvm::AMDGPU::Swizzle;
8694
8695 int64_t Lane[LANE_NUM];
8696 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8697 "expected a 2-bit lane id")) {
8699 for (unsigned I = 0; I < LANE_NUM; ++I) {
8700 Imm |= Lane[I] << (LANE_SHIFT * I);
8701 }
8702 return true;
8703 }
8704 return false;
8705}
8706
8707bool
8708AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8709 using namespace llvm::AMDGPU::Swizzle;
8710
8711 SMLoc Loc;
8712 int64_t GroupSize;
8713 int64_t LaneIdx;
8714
8715 if (!parseSwizzleOperand(GroupSize,
8716 2, 32,
8717 "group size must be in the interval [2,32]",
8718 Loc)) {
8719 return false;
8720 }
8721 if (!isPowerOf2_64(GroupSize)) {
8722 Error(Loc, "group size must be a power of two");
8723 return false;
8724 }
8725 if (parseSwizzleOperand(LaneIdx,
8726 0, GroupSize - 1,
8727 "lane id must be in the interval [0,group size - 1]",
8728 Loc)) {
8729 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8730 return true;
8731 }
8732 return false;
8733}
8734
8735bool
8736AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8737 using namespace llvm::AMDGPU::Swizzle;
8738
8739 SMLoc Loc;
8740 int64_t GroupSize;
8741
8742 if (!parseSwizzleOperand(GroupSize,
8743 2, 32,
8744 "group size must be in the interval [2,32]",
8745 Loc)) {
8746 return false;
8747 }
8748 if (!isPowerOf2_64(GroupSize)) {
8749 Error(Loc, "group size must be a power of two");
8750 return false;
8751 }
8752
8753 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8754 return true;
8755}
8756
8757bool
8758AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8759 using namespace llvm::AMDGPU::Swizzle;
8760
8761 SMLoc Loc;
8762 int64_t GroupSize;
8763
8764 if (!parseSwizzleOperand(GroupSize,
8765 1, 16,
8766 "group size must be in the interval [1,16]",
8767 Loc)) {
8768 return false;
8769 }
8770 if (!isPowerOf2_64(GroupSize)) {
8771 Error(Loc, "group size must be a power of two");
8772 return false;
8773 }
8774
8775 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8776 return true;
8777}
8778
8779bool
8780AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8781 using namespace llvm::AMDGPU::Swizzle;
8782
8783 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8784 return false;
8785 }
8786
8787 StringRef Ctl;
8788 SMLoc StrLoc = getLoc();
8789 if (!parseString(Ctl)) {
8790 return false;
8791 }
8792 if (Ctl.size() != BITMASK_WIDTH) {
8793 Error(StrLoc, "expected a 5-character mask");
8794 return false;
8795 }
8796
8797 unsigned AndMask = 0;
8798 unsigned OrMask = 0;
8799 unsigned XorMask = 0;
8800
8801 for (size_t i = 0; i < Ctl.size(); ++i) {
8802 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8803 switch(Ctl[i]) {
8804 default:
8805 Error(StrLoc, "invalid mask");
8806 return false;
8807 case '0':
8808 break;
8809 case '1':
8810 OrMask |= Mask;
8811 break;
8812 case 'p':
8813 AndMask |= Mask;
8814 break;
8815 case 'i':
8816 AndMask |= Mask;
8817 XorMask |= Mask;
8818 break;
8819 }
8820 }
8821
8822 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8823 return true;
8824}
8825
8826bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8827 using namespace llvm::AMDGPU::Swizzle;
8828
8829 if (!AMDGPU::isGFX9Plus(getSTI())) {
8830 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8831 return false;
8832 }
8833
8834 int64_t Swizzle;
8835 SMLoc Loc;
8836 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8837 "FFT swizzle must be in the interval [0," +
8838 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8839 Loc))
8840 return false;
8841
8842 Imm = FFT_MODE_ENC | Swizzle;
8843 return true;
8844}
8845
8846bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8847 using namespace llvm::AMDGPU::Swizzle;
8848
8849 if (!AMDGPU::isGFX9Plus(getSTI())) {
8850 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8851 return false;
8852 }
8853
8854 SMLoc Loc;
8855 int64_t Direction;
8856
8857 if (!parseSwizzleOperand(Direction, 0, 1,
8858 "direction must be 0 (left) or 1 (right)", Loc))
8859 return false;
8860
8861 int64_t RotateSize;
8862 if (!parseSwizzleOperand(
8863 RotateSize, 0, ROTATE_MAX_SIZE,
8864 "number of threads to rotate must be in the interval [0," +
8865 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8866 Loc))
8867 return false;
8868
8870 (RotateSize << ROTATE_SIZE_SHIFT);
8871 return true;
8872}
8873
8874bool
8875AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8876
8877 SMLoc OffsetLoc = getLoc();
8878
8879 if (!parseExpr(Imm, "a swizzle macro")) {
8880 return false;
8881 }
8882 if (!isUInt<16>(Imm)) {
8883 Error(OffsetLoc, "expected a 16-bit offset");
8884 return false;
8885 }
8886 return true;
8887}
8888
8889bool
8890AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8891 using namespace llvm::AMDGPU::Swizzle;
8892
8893 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8894
8895 SMLoc ModeLoc = getLoc();
8896 bool Ok = false;
8897
8898 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8899 Ok = parseSwizzleQuadPerm(Imm);
8900 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8901 Ok = parseSwizzleBitmaskPerm(Imm);
8902 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8903 Ok = parseSwizzleBroadcast(Imm);
8904 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8905 Ok = parseSwizzleSwap(Imm);
8906 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8907 Ok = parseSwizzleReverse(Imm);
8908 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8909 Ok = parseSwizzleFFT(Imm);
8910 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8911 Ok = parseSwizzleRotate(Imm);
8912 } else {
8913 Error(ModeLoc, "expected a swizzle mode");
8914 }
8915
8916 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8917 }
8918
8919 return false;
8920}
8921
8922ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8923 SMLoc S = getLoc();
8924 int64_t Imm = 0;
8925
8926 if (trySkipId("offset")) {
8927
8928 bool Ok = false;
8929 if (skipToken(AsmToken::Colon, "expected a colon")) {
8930 if (trySkipId("swizzle")) {
8931 Ok = parseSwizzleMacro(Imm);
8932 } else {
8933 Ok = parseSwizzleOffset(Imm);
8934 }
8935 }
8936
8937 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8938
8940 }
8941 return ParseStatus::NoMatch;
8942}
8943
8944bool
8945AMDGPUOperand::isSwizzle() const {
8946 return isImmTy(ImmTySwizzle);
8947}
8948
8949//===----------------------------------------------------------------------===//
8950// VGPR Index Mode
8951//===----------------------------------------------------------------------===//
8952
8953int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8954
8955 using namespace llvm::AMDGPU::VGPRIndexMode;
8956
8957 if (trySkipToken(AsmToken::RParen)) {
8958 return OFF;
8959 }
8960
8961 int64_t Imm = 0;
8962
8963 while (true) {
8964 unsigned Mode = 0;
8965 SMLoc S = getLoc();
8966
8967 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8968 if (trySkipId(IdSymbolic[ModeId])) {
8969 Mode = 1 << ModeId;
8970 break;
8971 }
8972 }
8973
8974 if (Mode == 0) {
8975 Error(S, (Imm == 0)?
8976 "expected a VGPR index mode or a closing parenthesis" :
8977 "expected a VGPR index mode");
8978 return UNDEF;
8979 }
8980
8981 if (Imm & Mode) {
8982 Error(S, "duplicate VGPR index mode");
8983 return UNDEF;
8984 }
8985 Imm |= Mode;
8986
8987 if (trySkipToken(AsmToken::RParen))
8988 break;
8989 if (!skipToken(AsmToken::Comma,
8990 "expected a comma or a closing parenthesis"))
8991 return UNDEF;
8992 }
8993
8994 return Imm;
8995}
8996
8997ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8998
8999 using namespace llvm::AMDGPU::VGPRIndexMode;
9000
9001 int64_t Imm = 0;
9002 SMLoc S = getLoc();
9003
9004 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9005 Imm = parseGPRIdxMacro();
9006 if (Imm == UNDEF)
9007 return ParseStatus::Failure;
9008 } else {
9009 if (getParser().parseAbsoluteExpression(Imm))
9010 return ParseStatus::Failure;
9011 if (Imm < 0 || !isUInt<4>(Imm))
9012 return Error(S, "invalid immediate: only 4-bit values are legal");
9013 }
9014
9015 Operands.push_back(
9016 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9017 return ParseStatus::Success;
9018}
9019
9020bool AMDGPUOperand::isGPRIdxMode() const {
9021 return isImmTy(ImmTyGprIdxMode);
9022}
9023
9024//===----------------------------------------------------------------------===//
9025// sopp branch targets
9026//===----------------------------------------------------------------------===//
9027
9028ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9029
9030 // Make sure we are not parsing something
9031 // that looks like a label or an expression but is not.
9032 // This will improve error messages.
9033 if (isRegister() || isModifier())
9034 return ParseStatus::NoMatch;
9035
9036 if (!parseExpr(Operands))
9037 return ParseStatus::Failure;
9038
9039 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9040 assert(Opr.isImm() || Opr.isExpr());
9041 SMLoc Loc = Opr.getStartLoc();
9042
9043 // Currently we do not support arbitrary expressions as branch targets.
9044 // Only labels and absolute expressions are accepted.
9045 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9046 Error(Loc, "expected an absolute expression or a label");
9047 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9048 Error(Loc, "expected a 16-bit signed jump offset");
9049 }
9050
9051 return ParseStatus::Success;
9052}
9053
9054//===----------------------------------------------------------------------===//
9055// Boolean holding registers
9056//===----------------------------------------------------------------------===//
9057
9058ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9059 return parseReg(Operands);
9060}
9061
9062//===----------------------------------------------------------------------===//
9063// mubuf
9064//===----------------------------------------------------------------------===//
9065
9066void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9067 const OperandVector &Operands,
9068 bool IsAtomic) {
9069 OptionalImmIndexMap OptionalIdx;
9070 unsigned FirstOperandIdx = 1;
9071 bool IsAtomicReturn = false;
9072
9073 if (IsAtomic) {
9074 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9076 }
9077
9078 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9079 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9080
9081 // Add the register arguments
9082 if (Op.isReg()) {
9083 Op.addRegOperands(Inst, 1);
9084 // Insert a tied src for atomic return dst.
9085 // This cannot be postponed as subsequent calls to
9086 // addImmOperands rely on correct number of MC operands.
9087 if (IsAtomicReturn && i == FirstOperandIdx)
9088 Op.addRegOperands(Inst, 1);
9089 continue;
9090 }
9091
9092 // Handle the case where soffset is an immediate
9093 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9094 Op.addImmOperands(Inst, 1);
9095 continue;
9096 }
9097
9098 // Handle tokens like 'offen' which are sometimes hard-coded into the
9099 // asm string. There are no MCInst operands for these.
9100 if (Op.isToken()) {
9101 continue;
9102 }
9103 assert(Op.isImm());
9104
9105 // Handle optional arguments
9106 OptionalIdx[Op.getImmTy()] = i;
9107 }
9108
9109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9111 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9112 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9114}
9115
9116//===----------------------------------------------------------------------===//
9117// smrd
9118//===----------------------------------------------------------------------===//
9119
9120bool AMDGPUOperand::isSMRDOffset8() const {
9121 return isImmLiteral() && isUInt<8>(getImm());
9122}
9123
9124bool AMDGPUOperand::isSMEMOffset() const {
9125 // Offset range is checked later by validator.
9126 return isImmLiteral();
9127}
9128
9129bool AMDGPUOperand::isSMRDLiteralOffset() const {
9130 // 32-bit literals are only supported on CI and we only want to use them
9131 // when the offset is > 8-bits.
9132 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9133}
9134
9135//===----------------------------------------------------------------------===//
9136// vop3
9137//===----------------------------------------------------------------------===//
9138
9139static bool ConvertOmodMul(int64_t &Mul) {
9140 if (Mul != 1 && Mul != 2 && Mul != 4)
9141 return false;
9142
9143 Mul >>= 1;
9144 return true;
9145}
9146
9147static bool ConvertOmodDiv(int64_t &Div) {
9148 if (Div == 1) {
9149 Div = 0;
9150 return true;
9151 }
9152
9153 if (Div == 2) {
9154 Div = 3;
9155 return true;
9156 }
9157
9158 return false;
9159}
9160
9161// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9162// This is intentional and ensures compatibility with sp3.
9163// See bug 35397 for details.
9164bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9165 if (BoundCtrl == 0 || BoundCtrl == 1) {
9166 if (!isGFX11Plus())
9167 BoundCtrl = 1;
9168 return true;
9169 }
9170 return false;
9171}
9172
9173void AMDGPUAsmParser::onBeginOfFile() {
9174 if (!getParser().getStreamer().getTargetStreamer() ||
9175 getSTI().getTargetTriple().getArch() == Triple::r600)
9176 return;
9177
9178 if (!getTargetStreamer().getTargetID())
9179 getTargetStreamer().initializeTargetID(getSTI(),
9180 getSTI().getFeatureString());
9181
9182 if (isHsaAbi(getSTI()))
9183 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9184}
9185
9186/// Parse AMDGPU specific expressions.
9187///
9188/// expr ::= or(expr, ...) |
9189/// max(expr, ...)
9190///
9191bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9192 using AGVK = AMDGPUMCExpr::VariantKind;
9193
9194 if (isToken(AsmToken::Identifier)) {
9195 StringRef TokenId = getTokenStr();
9196 AGVK VK = StringSwitch<AGVK>(TokenId)
9197 .Case("max", AGVK::AGVK_Max)
9198 .Case("or", AGVK::AGVK_Or)
9199 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9200 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9201 .Case("alignto", AGVK::AGVK_AlignTo)
9202 .Case("occupancy", AGVK::AGVK_Occupancy)
9203 .Default(AGVK::AGVK_None);
9204
9205 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9207 uint64_t CommaCount = 0;
9208 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9209 lex(); // Eat '('
9210 while (true) {
9211 if (trySkipToken(AsmToken::RParen)) {
9212 if (Exprs.empty()) {
9213 Error(getToken().getLoc(),
9214 "empty " + Twine(TokenId) + " expression");
9215 return true;
9216 }
9217 if (CommaCount + 1 != Exprs.size()) {
9218 Error(getToken().getLoc(),
9219 "mismatch of commas in " + Twine(TokenId) + " expression");
9220 return true;
9221 }
9222 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9223 return false;
9224 }
9225 const MCExpr *Expr;
9226 if (getParser().parseExpression(Expr, EndLoc))
9227 return true;
9228 Exprs.push_back(Expr);
9229 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9230 if (LastTokenWasComma)
9231 CommaCount++;
9232 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9233 Error(getToken().getLoc(),
9234 "unexpected token in " + Twine(TokenId) + " expression");
9235 return true;
9236 }
9237 }
9238 }
9239 }
9240 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9241}
9242
9243ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9244 StringRef Name = getTokenStr();
9245 if (Name == "mul") {
9246 return parseIntWithPrefix("mul", Operands,
9247 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9248 }
9249
9250 if (Name == "div") {
9251 return parseIntWithPrefix("div", Operands,
9252 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9253 }
9254
9255 return ParseStatus::NoMatch;
9256}
9257
9258// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9259// the number of src operands present, then copies that bit into src0_modifiers.
9260static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9261 int Opc = Inst.getOpcode();
9262 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9263 if (OpSelIdx == -1)
9264 return;
9265
9266 int SrcNum;
9267 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9268 AMDGPU::OpName::src2};
9269 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9270 ++SrcNum)
9271 ;
9272 assert(SrcNum > 0);
9273
9274 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9275
9276 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9277 if (DstIdx == -1)
9278 return;
9279
9280 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9281 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9282 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9283 if (DstOp.isReg() &&
9284 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9285 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9286 ModVal |= SISrcMods::DST_OP_SEL;
9287 } else {
9288 if ((OpSel & (1 << SrcNum)) != 0)
9289 ModVal |= SISrcMods::DST_OP_SEL;
9290 }
9291 Inst.getOperand(ModIdx).setImm(ModVal);
9292}
9293
9294void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9295 const OperandVector &Operands) {
9296 cvtVOP3P(Inst, Operands);
9297 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9298}
9299
9300void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9301 OptionalImmIndexMap &OptionalIdx) {
9302 cvtVOP3P(Inst, Operands, OptionalIdx);
9303 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9304}
9305
9306static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9307 return
9308 // 1. This operand is input modifiers
9309 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9310 // 2. This is not last operand
9311 && Desc.NumOperands > (OpNum + 1)
9312 // 3. Next operand is register class
9313 && Desc.operands()[OpNum + 1].RegClass != -1
9314 // 4. Next register is not tied to any other operand
9315 && Desc.getOperandConstraint(OpNum + 1,
9317}
9318
9319void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9320 unsigned Opc = Inst.getOpcode();
9321 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9322 AMDGPU::OpName::src2};
9323 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9324 AMDGPU::OpName::src1_modifiers,
9325 AMDGPU::OpName::src2_modifiers};
9326 for (int J = 0; J < 3; ++J) {
9327 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9328 if (OpIdx == -1)
9329 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9330 // no src1. So continue instead of break.
9331 continue;
9332
9333 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9334 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9335
9336 if ((OpSel & (1 << J)) != 0)
9337 ModVal |= SISrcMods::OP_SEL_0;
9338 // op_sel[3] is encoded in src0_modifiers.
9339 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9340 ModVal |= SISrcMods::DST_OP_SEL;
9341
9342 Inst.getOperand(ModIdx).setImm(ModVal);
9343 }
9344}
9345
9346void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9347{
9348 OptionalImmIndexMap OptionalIdx;
9349 unsigned Opc = Inst.getOpcode();
9350
9351 unsigned I = 1;
9352 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9353 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9354 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9355 }
9356
9357 for (unsigned E = Operands.size(); I != E; ++I) {
9358 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9360 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9361 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9362 Op.isInterpAttrChan()) {
9363 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9364 } else if (Op.isImmModifier()) {
9365 OptionalIdx[Op.getImmTy()] = I;
9366 } else {
9367 llvm_unreachable("unhandled operand type");
9368 }
9369 }
9370
9371 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9372 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9373 AMDGPUOperand::ImmTyHigh);
9374
9375 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9376 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9377 AMDGPUOperand::ImmTyClamp);
9378
9379 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9380 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9381 AMDGPUOperand::ImmTyOModSI);
9382
9383 // Some v_interp instructions use op_sel[3] for dst.
9384 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9385 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9386 AMDGPUOperand::ImmTyOpSel);
9387 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9388 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9389
9390 cvtOpSelHelper(Inst, OpSel);
9391 }
9392}
9393
9394void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9395{
9396 OptionalImmIndexMap OptionalIdx;
9397 unsigned Opc = Inst.getOpcode();
9398
9399 unsigned I = 1;
9400 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9401 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9402 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9403 }
9404
9405 for (unsigned E = Operands.size(); I != E; ++I) {
9406 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9408 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9409 } else if (Op.isImmModifier()) {
9410 OptionalIdx[Op.getImmTy()] = I;
9411 } else {
9412 llvm_unreachable("unhandled operand type");
9413 }
9414 }
9415
9416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9417
9418 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9419 if (OpSelIdx != -1)
9420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9421
9422 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9423
9424 if (OpSelIdx == -1)
9425 return;
9426
9427 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9428 cvtOpSelHelper(Inst, OpSel);
9429}
9430
9431void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9432 const OperandVector &Operands) {
9433 OptionalImmIndexMap OptionalIdx;
9434 unsigned Opc = Inst.getOpcode();
9435 unsigned I = 1;
9436 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9437
9438 const MCInstrDesc &Desc = MII.get(Opc);
9439
9440 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9441 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9442
9443 for (unsigned E = Operands.size(); I != E; ++I) {
9444 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9445 int NumOperands = Inst.getNumOperands();
9446 // The order of operands in MCInst and parsed operands are different.
9447 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9448 // indices for parsing scale values correctly.
9449 if (NumOperands == CbszOpIdx) {
9452 }
9453 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9454 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9455 } else if (Op.isImmModifier()) {
9456 OptionalIdx[Op.getImmTy()] = I;
9457 } else {
9458 Op.addRegOrImmOperands(Inst, 1);
9459 }
9460 }
9461
9462 // Insert CBSZ and BLGP operands for F8F6F4 variants
9463 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9464 if (CbszIdx != OptionalIdx.end()) {
9465 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9466 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9467 }
9468
9469 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9470 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9471 if (BlgpIdx != OptionalIdx.end()) {
9472 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9473 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9474 }
9475
9476 // Add dummy src_modifiers
9479
9480 // Handle op_sel fields
9481
9482 unsigned OpSel = 0;
9483 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9484 if (OpselIdx != OptionalIdx.end()) {
9485 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9486 .getImm();
9487 }
9488
9489 unsigned OpSelHi = 0;
9490 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9491 if (OpselHiIdx != OptionalIdx.end()) {
9492 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9493 .getImm();
9494 }
9495 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9496 AMDGPU::OpName::src1_modifiers};
9497
9498 for (unsigned J = 0; J < 2; ++J) {
9499 unsigned ModVal = 0;
9500 if (OpSel & (1 << J))
9501 ModVal |= SISrcMods::OP_SEL_0;
9502 if (OpSelHi & (1 << J))
9503 ModVal |= SISrcMods::OP_SEL_1;
9504
9505 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9506 Inst.getOperand(ModIdx).setImm(ModVal);
9507 }
9508}
9509
9510void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9511 OptionalImmIndexMap &OptionalIdx) {
9512 unsigned Opc = Inst.getOpcode();
9513
9514 unsigned I = 1;
9515 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9516 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9517 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9518 }
9519
9520 for (unsigned E = Operands.size(); I != E; ++I) {
9521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9523 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9524 } else if (Op.isImmModifier()) {
9525 OptionalIdx[Op.getImmTy()] = I;
9526 } else {
9527 Op.addRegOrImmOperands(Inst, 1);
9528 }
9529 }
9530
9531 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9532 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9533 AMDGPUOperand::ImmTyScaleSel);
9534
9535 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9536 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9537 AMDGPUOperand::ImmTyClamp);
9538
9539 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9540 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9541 Inst.addOperand(Inst.getOperand(0));
9542 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9543 AMDGPUOperand::ImmTyByteSel);
9544 }
9545
9546 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9547 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9548 AMDGPUOperand::ImmTyOModSI);
9549
9550 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9551 // it has src2 register operand that is tied to dst operand
9552 // we don't allow modifiers for this operand in assembler so src2_modifiers
9553 // should be 0.
9554 if (isMAC(Opc)) {
9555 auto *it = Inst.begin();
9556 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9557 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9558 ++it;
9559 // Copy the operand to ensure it's not invalidated when Inst grows.
9560 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9561 }
9562}
9563
9564void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9565 OptionalImmIndexMap OptionalIdx;
9566 cvtVOP3(Inst, Operands, OptionalIdx);
9567}
9568
9569void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9570 OptionalImmIndexMap &OptIdx) {
9571 const int Opc = Inst.getOpcode();
9572 const MCInstrDesc &Desc = MII.get(Opc);
9573
9574 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9575
9576 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9577 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9578 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9579 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9580 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9581 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9582 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9583 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9584 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9585 Inst.addOperand(Inst.getOperand(0));
9586 }
9587
9588 // Adding vdst_in operand is already covered for these DPP instructions in
9589 // cvtVOP3DPP.
9590 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9591 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 ||
9592 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 ||
9593 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 ||
9594 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 ||
9595 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 ||
9596 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 ||
9597 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 ||
9598 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 ||
9599 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 ||
9600 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 ||
9601 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 ||
9602 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 ||
9603 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9604 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9605 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9606 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9607 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9608 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9609 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9610 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9611 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9612 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9613 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9614 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9615 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9616 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9617 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9618 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9619 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9620 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9621 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9622 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9623 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9624 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9625 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9626 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9627 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9628 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9629 Inst.addOperand(Inst.getOperand(0));
9630 }
9631
9632 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9633 if (BitOp3Idx != -1) {
9634 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9635 }
9636
9637 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9638 // instruction, and then figure out where to actually put the modifiers
9639
9640 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9641 if (OpSelIdx != -1) {
9642 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9643 }
9644
9645 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9646 if (OpSelHiIdx != -1) {
9647 int DefaultVal = IsPacked ? -1 : 0;
9648 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9649 DefaultVal);
9650 }
9651
9652 int MatrixAFMTIdx =
9653 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9654 if (MatrixAFMTIdx != -1) {
9655 addOptionalImmOperand(Inst, Operands, OptIdx,
9656 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9657 }
9658
9659 int MatrixBFMTIdx =
9660 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9661 if (MatrixBFMTIdx != -1) {
9662 addOptionalImmOperand(Inst, Operands, OptIdx,
9663 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9664 }
9665
9666 int MatrixAScaleIdx =
9667 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9668 if (MatrixAScaleIdx != -1) {
9669 addOptionalImmOperand(Inst, Operands, OptIdx,
9670 AMDGPUOperand::ImmTyMatrixAScale, 0);
9671 }
9672
9673 int MatrixBScaleIdx =
9674 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9675 if (MatrixBScaleIdx != -1) {
9676 addOptionalImmOperand(Inst, Operands, OptIdx,
9677 AMDGPUOperand::ImmTyMatrixBScale, 0);
9678 }
9679
9680 int MatrixAScaleFmtIdx =
9681 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9682 if (MatrixAScaleFmtIdx != -1) {
9683 addOptionalImmOperand(Inst, Operands, OptIdx,
9684 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9685 }
9686
9687 int MatrixBScaleFmtIdx =
9688 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9689 if (MatrixBScaleFmtIdx != -1) {
9690 addOptionalImmOperand(Inst, Operands, OptIdx,
9691 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9692 }
9693
9694 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9695 addOptionalImmOperand(Inst, Operands, OptIdx,
9696 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9697
9698 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9699 addOptionalImmOperand(Inst, Operands, OptIdx,
9700 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9701
9702 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9703 if (NegLoIdx != -1)
9704 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9705
9706 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9707 if (NegHiIdx != -1)
9708 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9709
9710 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9711 AMDGPU::OpName::src2};
9712 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9713 AMDGPU::OpName::src1_modifiers,
9714 AMDGPU::OpName::src2_modifiers};
9715
9716 unsigned OpSel = 0;
9717 unsigned OpSelHi = 0;
9718 unsigned NegLo = 0;
9719 unsigned NegHi = 0;
9720
9721 if (OpSelIdx != -1)
9722 OpSel = Inst.getOperand(OpSelIdx).getImm();
9723
9724 if (OpSelHiIdx != -1)
9725 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9726
9727 if (NegLoIdx != -1)
9728 NegLo = Inst.getOperand(NegLoIdx).getImm();
9729
9730 if (NegHiIdx != -1)
9731 NegHi = Inst.getOperand(NegHiIdx).getImm();
9732
9733 for (int J = 0; J < 3; ++J) {
9734 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9735 if (OpIdx == -1)
9736 break;
9737
9738 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9739
9740 if (ModIdx == -1)
9741 continue;
9742
9743 uint32_t ModVal = 0;
9744
9745 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9746 if (SrcOp.isReg() && getMRI()
9747 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9748 .contains(SrcOp.getReg())) {
9749 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9750 if (VGPRSuffixIsHi)
9751 ModVal |= SISrcMods::OP_SEL_0;
9752 } else {
9753 if ((OpSel & (1 << J)) != 0)
9754 ModVal |= SISrcMods::OP_SEL_0;
9755 }
9756
9757 if ((OpSelHi & (1 << J)) != 0)
9758 ModVal |= SISrcMods::OP_SEL_1;
9759
9760 if ((NegLo & (1 << J)) != 0)
9761 ModVal |= SISrcMods::NEG;
9762
9763 if ((NegHi & (1 << J)) != 0)
9764 ModVal |= SISrcMods::NEG_HI;
9765
9766 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9767 }
9768}
9769
9770void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9771 OptionalImmIndexMap OptIdx;
9772 cvtVOP3(Inst, Operands, OptIdx);
9773 cvtVOP3P(Inst, Operands, OptIdx);
9774}
9775
9777 unsigned i, unsigned Opc,
9778 AMDGPU::OpName OpName) {
9779 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9780 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9781 else
9782 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9783}
9784
9785void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9786 unsigned Opc = Inst.getOpcode();
9787
9788 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9789 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9790 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9791 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9792 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9793
9794 OptionalImmIndexMap OptIdx;
9795 for (unsigned i = 5; i < Operands.size(); ++i) {
9796 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9797 OptIdx[Op.getImmTy()] = i;
9798 }
9799
9800 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9801 addOptionalImmOperand(Inst, Operands, OptIdx,
9802 AMDGPUOperand::ImmTyIndexKey8bit);
9803
9804 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9805 addOptionalImmOperand(Inst, Operands, OptIdx,
9806 AMDGPUOperand::ImmTyIndexKey16bit);
9807
9808 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9809 addOptionalImmOperand(Inst, Operands, OptIdx,
9810 AMDGPUOperand::ImmTyIndexKey32bit);
9811
9812 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9813 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9814
9815 cvtVOP3P(Inst, Operands, OptIdx);
9816}
9817
9818//===----------------------------------------------------------------------===//
9819// VOPD
9820//===----------------------------------------------------------------------===//
9821
9822ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9823 if (!hasVOPD(getSTI()))
9824 return ParseStatus::NoMatch;
9825
9826 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9827 SMLoc S = getLoc();
9828 lex();
9829 lex();
9830 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9831 SMLoc OpYLoc = getLoc();
9832 StringRef OpYName;
9833 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9834 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9835 return ParseStatus::Success;
9836 }
9837 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9838 }
9839 return ParseStatus::NoMatch;
9840}
9841
9842// Create VOPD MCInst operands using parsed assembler operands.
9843void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9844 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9845
9846 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9847 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9849 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9850 return;
9851 }
9852 if (Op.isReg()) {
9853 Op.addRegOperands(Inst, 1);
9854 return;
9855 }
9856 if (Op.isImm()) {
9857 Op.addImmOperands(Inst, 1);
9858 return;
9859 }
9860 llvm_unreachable("Unhandled operand type in cvtVOPD");
9861 };
9862
9863 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9864
9865 // MCInst operands are ordered as follows:
9866 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9867
9868 for (auto CompIdx : VOPD::COMPONENTS) {
9869 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9870 }
9871
9872 for (auto CompIdx : VOPD::COMPONENTS) {
9873 const auto &CInfo = InstInfo[CompIdx];
9874 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9875 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9876 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9877 if (CInfo.hasSrc2Acc())
9878 addOp(CInfo.getIndexOfDstInParsedOperands());
9879 }
9880
9881 int BitOp3Idx =
9882 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9883 if (BitOp3Idx != -1) {
9884 OptionalImmIndexMap OptIdx;
9885 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9886 if (Op.isImm())
9887 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9888
9889 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9890 }
9891}
9892
9893//===----------------------------------------------------------------------===//
9894// dpp
9895//===----------------------------------------------------------------------===//
9896
9897bool AMDGPUOperand::isDPP8() const {
9898 return isImmTy(ImmTyDPP8);
9899}
9900
9901bool AMDGPUOperand::isDPPCtrl() const {
9902 using namespace AMDGPU::DPP;
9903
9904 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9905 if (result) {
9906 int64_t Imm = getImm();
9907 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9908 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9909 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9910 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9911 (Imm == DppCtrl::WAVE_SHL1) ||
9912 (Imm == DppCtrl::WAVE_ROL1) ||
9913 (Imm == DppCtrl::WAVE_SHR1) ||
9914 (Imm == DppCtrl::WAVE_ROR1) ||
9915 (Imm == DppCtrl::ROW_MIRROR) ||
9916 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9917 (Imm == DppCtrl::BCAST15) ||
9918 (Imm == DppCtrl::BCAST31) ||
9919 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9920 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9921 }
9922 return false;
9923}
9924
9925//===----------------------------------------------------------------------===//
9926// mAI
9927//===----------------------------------------------------------------------===//
9928
9929bool AMDGPUOperand::isBLGP() const {
9930 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9931}
9932
9933bool AMDGPUOperand::isS16Imm() const {
9934 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9935}
9936
9937bool AMDGPUOperand::isU16Imm() const {
9938 return isImmLiteral() && isUInt<16>(getImm());
9939}
9940
9941//===----------------------------------------------------------------------===//
9942// dim
9943//===----------------------------------------------------------------------===//
9944
9945bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9946 // We want to allow "dim:1D" etc.,
9947 // but the initial 1 is tokenized as an integer.
9948 std::string Token;
9949 if (isToken(AsmToken::Integer)) {
9950 SMLoc Loc = getToken().getEndLoc();
9951 Token = std::string(getTokenStr());
9952 lex();
9953 if (getLoc() != Loc)
9954 return false;
9955 }
9956
9957 StringRef Suffix;
9958 if (!parseId(Suffix))
9959 return false;
9960 Token += Suffix;
9961
9962 StringRef DimId = Token;
9963 DimId.consume_front("SQ_RSRC_IMG_");
9964
9965 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9966 if (!DimInfo)
9967 return false;
9968
9969 Encoding = DimInfo->Encoding;
9970 return true;
9971}
9972
9973ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9974 if (!isGFX10Plus())
9975 return ParseStatus::NoMatch;
9976
9977 SMLoc S = getLoc();
9978
9979 if (!trySkipId("dim", AsmToken::Colon))
9980 return ParseStatus::NoMatch;
9981
9982 unsigned Encoding;
9983 SMLoc Loc = getLoc();
9984 if (!parseDimId(Encoding))
9985 return Error(Loc, "invalid dim value");
9986
9987 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9988 AMDGPUOperand::ImmTyDim));
9989 return ParseStatus::Success;
9990}
9991
9992//===----------------------------------------------------------------------===//
9993// dpp
9994//===----------------------------------------------------------------------===//
9995
9996ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9997 SMLoc S = getLoc();
9998
9999 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10000 return ParseStatus::NoMatch;
10001
10002 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10003
10004 int64_t Sels[8];
10005
10006 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10007 return ParseStatus::Failure;
10008
10009 for (size_t i = 0; i < 8; ++i) {
10010 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10011 return ParseStatus::Failure;
10012
10013 SMLoc Loc = getLoc();
10014 if (getParser().parseAbsoluteExpression(Sels[i]))
10015 return ParseStatus::Failure;
10016 if (0 > Sels[i] || 7 < Sels[i])
10017 return Error(Loc, "expected a 3-bit value");
10018 }
10019
10020 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10021 return ParseStatus::Failure;
10022
10023 unsigned DPP8 = 0;
10024 for (size_t i = 0; i < 8; ++i)
10025 DPP8 |= (Sels[i] << (i * 3));
10026
10027 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10028 return ParseStatus::Success;
10029}
10030
10031bool
10032AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10033 const OperandVector &Operands) {
10034 if (Ctrl == "row_newbcast")
10035 return isGFX90A();
10036
10037 if (Ctrl == "row_share" ||
10038 Ctrl == "row_xmask")
10039 return isGFX10Plus();
10040
10041 if (Ctrl == "wave_shl" ||
10042 Ctrl == "wave_shr" ||
10043 Ctrl == "wave_rol" ||
10044 Ctrl == "wave_ror" ||
10045 Ctrl == "row_bcast")
10046 return isVI() || isGFX9();
10047
10048 return Ctrl == "row_mirror" ||
10049 Ctrl == "row_half_mirror" ||
10050 Ctrl == "quad_perm" ||
10051 Ctrl == "row_shl" ||
10052 Ctrl == "row_shr" ||
10053 Ctrl == "row_ror";
10054}
10055
10056int64_t
10057AMDGPUAsmParser::parseDPPCtrlPerm() {
10058 // quad_perm:[%d,%d,%d,%d]
10059
10060 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10061 return -1;
10062
10063 int64_t Val = 0;
10064 for (int i = 0; i < 4; ++i) {
10065 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10066 return -1;
10067
10068 int64_t Temp;
10069 SMLoc Loc = getLoc();
10070 if (getParser().parseAbsoluteExpression(Temp))
10071 return -1;
10072 if (Temp < 0 || Temp > 3) {
10073 Error(Loc, "expected a 2-bit value");
10074 return -1;
10075 }
10076
10077 Val += (Temp << i * 2);
10078 }
10079
10080 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10081 return -1;
10082
10083 return Val;
10084}
10085
10086int64_t
10087AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10088 using namespace AMDGPU::DPP;
10089
10090 // sel:%d
10091
10092 int64_t Val;
10093 SMLoc Loc = getLoc();
10094
10095 if (getParser().parseAbsoluteExpression(Val))
10096 return -1;
10097
10098 struct DppCtrlCheck {
10099 int64_t Ctrl;
10100 int Lo;
10101 int Hi;
10102 };
10103
10104 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10105 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10106 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10107 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10108 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10109 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10110 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10111 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10112 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10113 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10114 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10115 .Default({-1, 0, 0});
10116
10117 bool Valid;
10118 if (Check.Ctrl == -1) {
10119 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10120 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10121 } else {
10122 Valid = Check.Lo <= Val && Val <= Check.Hi;
10123 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10124 }
10125
10126 if (!Valid) {
10127 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10128 return -1;
10129 }
10130
10131 return Val;
10132}
10133
10134ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10135 using namespace AMDGPU::DPP;
10136
10137 if (!isToken(AsmToken::Identifier) ||
10138 !isSupportedDPPCtrl(getTokenStr(), Operands))
10139 return ParseStatus::NoMatch;
10140
10141 SMLoc S = getLoc();
10142 int64_t Val = -1;
10143 StringRef Ctrl;
10144
10145 parseId(Ctrl);
10146
10147 if (Ctrl == "row_mirror") {
10148 Val = DppCtrl::ROW_MIRROR;
10149 } else if (Ctrl == "row_half_mirror") {
10150 Val = DppCtrl::ROW_HALF_MIRROR;
10151 } else {
10152 if (skipToken(AsmToken::Colon, "expected a colon")) {
10153 if (Ctrl == "quad_perm") {
10154 Val = parseDPPCtrlPerm();
10155 } else {
10156 Val = parseDPPCtrlSel(Ctrl);
10157 }
10158 }
10159 }
10160
10161 if (Val == -1)
10162 return ParseStatus::Failure;
10163
10164 Operands.push_back(
10165 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10166 return ParseStatus::Success;
10167}
10168
10169void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10170 bool IsDPP8) {
10171 OptionalImmIndexMap OptionalIdx;
10172 unsigned Opc = Inst.getOpcode();
10173 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10174
10175 // MAC instructions are special because they have 'old'
10176 // operand which is not tied to dst (but assumed to be).
10177 // They also have dummy unused src2_modifiers.
10178 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10179 int Src2ModIdx =
10180 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10181 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10182 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10183
10184 unsigned I = 1;
10185 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10186 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10187 }
10188
10189 int Fi = 0;
10190 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10191 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10192 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10193 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10194 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10195
10196 for (unsigned E = Operands.size(); I != E; ++I) {
10197
10198 if (IsMAC) {
10199 int NumOperands = Inst.getNumOperands();
10200 if (OldIdx == NumOperands) {
10201 // Handle old operand
10202 constexpr int DST_IDX = 0;
10203 Inst.addOperand(Inst.getOperand(DST_IDX));
10204 } else if (Src2ModIdx == NumOperands) {
10205 // Add unused dummy src2_modifiers
10207 }
10208 }
10209
10210 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10211 Inst.addOperand(Inst.getOperand(0));
10212 }
10213
10214 if (IsVOP3CvtSrDpp) {
10215 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10217 Inst.addOperand(MCOperand::createReg(MCRegister()));
10218 }
10219 }
10220
10221 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10223 if (TiedTo != -1) {
10224 assert((unsigned)TiedTo < Inst.getNumOperands());
10225 // handle tied old or src2 for MAC instructions
10226 Inst.addOperand(Inst.getOperand(TiedTo));
10227 }
10228 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10229 // Add the register arguments
10230 if (IsDPP8 && Op.isDppFI()) {
10231 Fi = Op.getImm();
10232 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10233 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10234 } else if (Op.isReg()) {
10235 Op.addRegOperands(Inst, 1);
10236 } else if (Op.isImm() &&
10237 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10238 Op.addImmOperands(Inst, 1);
10239 } else if (Op.isImm()) {
10240 OptionalIdx[Op.getImmTy()] = I;
10241 } else {
10242 llvm_unreachable("unhandled operand type");
10243 }
10244 }
10245
10246 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10247 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10248 AMDGPUOperand::ImmTyClamp);
10249
10250 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10251 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10252 Inst.addOperand(Inst.getOperand(0));
10253 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10254 AMDGPUOperand::ImmTyByteSel);
10255 }
10256
10257 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10258 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10259
10260 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10261 cvtVOP3P(Inst, Operands, OptionalIdx);
10262 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10263 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10264 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10266 }
10267
10268 if (IsDPP8) {
10269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10270 using namespace llvm::AMDGPU::DPP;
10271 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10272 } else {
10273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10275 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10276 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10277
10278 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10279 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10280 AMDGPUOperand::ImmTyDppFI);
10281 }
10282}
10283
10284void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10285 OptionalImmIndexMap OptionalIdx;
10286
10287 unsigned I = 1;
10288 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10289 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10290 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10291 }
10292
10293 int Fi = 0;
10294 for (unsigned E = Operands.size(); I != E; ++I) {
10295 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10297 if (TiedTo != -1) {
10298 assert((unsigned)TiedTo < Inst.getNumOperands());
10299 // handle tied old or src2 for MAC instructions
10300 Inst.addOperand(Inst.getOperand(TiedTo));
10301 }
10302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10303 // Add the register arguments
10304 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10305 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10306 // Skip it.
10307 continue;
10308 }
10309
10310 if (IsDPP8) {
10311 if (Op.isDPP8()) {
10312 Op.addImmOperands(Inst, 1);
10313 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10314 Op.addRegWithFPInputModsOperands(Inst, 2);
10315 } else if (Op.isDppFI()) {
10316 Fi = Op.getImm();
10317 } else if (Op.isReg()) {
10318 Op.addRegOperands(Inst, 1);
10319 } else {
10320 llvm_unreachable("Invalid operand type");
10321 }
10322 } else {
10324 Op.addRegWithFPInputModsOperands(Inst, 2);
10325 } else if (Op.isReg()) {
10326 Op.addRegOperands(Inst, 1);
10327 } else if (Op.isDPPCtrl()) {
10328 Op.addImmOperands(Inst, 1);
10329 } else if (Op.isImm()) {
10330 // Handle optional arguments
10331 OptionalIdx[Op.getImmTy()] = I;
10332 } else {
10333 llvm_unreachable("Invalid operand type");
10334 }
10335 }
10336 }
10337
10338 if (IsDPP8) {
10339 using namespace llvm::AMDGPU::DPP;
10340 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10341 } else {
10342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10344 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10345 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10346 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10347 AMDGPUOperand::ImmTyDppFI);
10348 }
10349 }
10350}
10351
10352//===----------------------------------------------------------------------===//
10353// sdwa
10354//===----------------------------------------------------------------------===//
10355
10356ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10357 StringRef Prefix,
10358 AMDGPUOperand::ImmTy Type) {
10359 return parseStringOrIntWithPrefix(
10360 Operands, Prefix,
10361 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10362 Type);
10363}
10364
10365ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10366 return parseStringOrIntWithPrefix(
10367 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10368 AMDGPUOperand::ImmTySDWADstUnused);
10369}
10370
10371void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10372 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10373}
10374
10375void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10376 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10377}
10378
10379void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10380 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10381}
10382
10383void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10384 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10385}
10386
10387void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10388 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10389}
10390
10391void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10392 uint64_t BasicInstType,
10393 bool SkipDstVcc,
10394 bool SkipSrcVcc) {
10395 using namespace llvm::AMDGPU::SDWA;
10396
10397 OptionalImmIndexMap OptionalIdx;
10398 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10399 bool SkippedVcc = false;
10400
10401 unsigned I = 1;
10402 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10403 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10404 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10405 }
10406
10407 for (unsigned E = Operands.size(); I != E; ++I) {
10408 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10409 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10410 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10411 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10412 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10413 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10414 // Skip VCC only if we didn't skip it on previous iteration.
10415 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10416 if (BasicInstType == SIInstrFlags::VOP2 &&
10417 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10418 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10419 SkippedVcc = true;
10420 continue;
10421 }
10422 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10423 SkippedVcc = true;
10424 continue;
10425 }
10426 }
10428 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10429 } else if (Op.isImm()) {
10430 // Handle optional arguments
10431 OptionalIdx[Op.getImmTy()] = I;
10432 } else {
10433 llvm_unreachable("Invalid operand type");
10434 }
10435 SkippedVcc = false;
10436 }
10437
10438 const unsigned Opc = Inst.getOpcode();
10439 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10440 Opc != AMDGPU::V_NOP_sdwa_vi) {
10441 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10442 switch (BasicInstType) {
10443 case SIInstrFlags::VOP1:
10444 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10445 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10446 AMDGPUOperand::ImmTyClamp, 0);
10447
10448 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10449 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10450 AMDGPUOperand::ImmTyOModSI, 0);
10451
10452 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10453 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10454 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10455
10456 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10457 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10458 AMDGPUOperand::ImmTySDWADstUnused,
10459 DstUnused::UNUSED_PRESERVE);
10460
10461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10462 break;
10463
10464 case SIInstrFlags::VOP2:
10465 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10466 AMDGPUOperand::ImmTyClamp, 0);
10467
10468 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10470
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10475 break;
10476
10477 case SIInstrFlags::VOPC:
10478 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10479 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10480 AMDGPUOperand::ImmTyClamp, 0);
10481 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10482 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10483 break;
10484
10485 default:
10486 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10487 }
10488 }
10489
10490 // special case v_mac_{f16, f32}:
10491 // it has src2 register operand that is tied to dst operand
10492 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10493 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10494 auto *it = Inst.begin();
10495 std::advance(
10496 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10497 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10498 }
10499}
10500
10501/// Force static initialization.
10502extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10507
10508#define GET_MATCHER_IMPLEMENTATION
10509#define GET_MNEMONIC_SPELL_CHECKER
10510#define GET_MNEMONIC_CHECKER
10511#include "AMDGPUGenAsmMatcher.inc"
10512
10513ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10514 unsigned MCK) {
10515 switch (MCK) {
10516 case MCK_addr64:
10517 return parseTokenOp("addr64", Operands);
10518 case MCK_done:
10519 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10520 case MCK_idxen:
10521 return parseTokenOp("idxen", Operands);
10522 case MCK_lds:
10523 return parseTokenOp("lds", Operands);
10524 case MCK_offen:
10525 return parseTokenOp("offen", Operands);
10526 case MCK_off:
10527 return parseTokenOp("off", Operands);
10528 case MCK_row_95_en:
10529 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10530 case MCK_gds:
10531 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10532 case MCK_tfe:
10533 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10534 }
10535 return tryCustomParseOperand(Operands, MCK);
10536}
10537
10538// This function should be defined after auto-generated include so that we have
10539// MatchClassKind enum defined
10540unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10541 unsigned Kind) {
10542 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10543 // But MatchInstructionImpl() expects to meet token and fails to validate
10544 // operand. This method checks if we are given immediate operand but expect to
10545 // get corresponding token.
10546 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10547 switch (Kind) {
10548 case MCK_addr64:
10549 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10550 case MCK_gds:
10551 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10552 case MCK_lds:
10553 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10554 case MCK_idxen:
10555 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10556 case MCK_offen:
10557 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10558 case MCK_tfe:
10559 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10560 case MCK_done:
10561 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10562 case MCK_row_95_en:
10563 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10564 case MCK_SSrc_b32:
10565 // When operands have expression values, they will return true for isToken,
10566 // because it is not possible to distinguish between a token and an
10567 // expression at parse time. MatchInstructionImpl() will always try to
10568 // match an operand as a token, when isToken returns true, and when the
10569 // name of the expression is not a valid token, the match will fail,
10570 // so we need to handle it here.
10571 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10572 case MCK_SSrc_f32:
10573 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10574 case MCK_SOPPBrTarget:
10575 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10576 case MCK_VReg32OrOff:
10577 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10578 case MCK_InterpSlot:
10579 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10580 case MCK_InterpAttr:
10581 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10582 case MCK_InterpAttrChan:
10583 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10584 case MCK_SReg_64:
10585 case MCK_SReg_64_XEXEC:
10586 // Null is defined as a 32-bit register but
10587 // it should also be enabled with 64-bit operands or larger.
10588 // The following code enables it for SReg_64 and larger operands
10589 // used as source and destination. Remaining source
10590 // operands are handled in isInlinableImm.
10591 case MCK_SReg_96:
10592 case MCK_SReg_128:
10593 case MCK_SReg_256:
10594 case MCK_SReg_512:
10595 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10596 default:
10597 return Match_InvalidOperand;
10598 }
10599}
10600
10601//===----------------------------------------------------------------------===//
10602// endpgm
10603//===----------------------------------------------------------------------===//
10604
10605ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10606 SMLoc S = getLoc();
10607 int64_t Imm = 0;
10608
10609 if (!parseExpr(Imm)) {
10610 // The operand is optional, if not present default to 0
10611 Imm = 0;
10612 }
10613
10614 if (!isUInt<16>(Imm))
10615 return Error(S, "expected a 16-bit value");
10616
10617 Operands.push_back(
10618 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10619 return ParseStatus::Success;
10620}
10621
10622bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10623
10624//===----------------------------------------------------------------------===//
10625// Split Barrier
10626//===----------------------------------------------------------------------===//
10627
10628bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1429
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:360
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...