LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::AMDGPU;
51using namespace llvm::amdhsa;
52
53namespace {
54
55class AMDGPUAsmParser;
56
57enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59//===----------------------------------------------------------------------===//
60// Operand
61//===----------------------------------------------------------------------===//
62
63class AMDGPUOperand : public MCParsedAsmOperand {
64 enum KindTy {
65 Token,
66 Immediate,
67 Register,
68 Expression
69 } Kind;
70
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
73
74public:
75 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
77
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80 struct Modifiers {
81 bool Abs = false;
82 bool Neg = false;
83 bool Sext = false;
84 LitModifier Lit = LitModifier::None;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit() const { return Lit == LitModifier::Lit; }
90 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92 int64_t getFPModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Abs ? SISrcMods::ABS : 0u;
95 Operand |= Neg ? SISrcMods::NEG : 0u;
96 return Operand;
97 }
98
99 int64_t getIntModifiersOperand() const {
100 int64_t Operand = 0;
101 Operand |= Sext ? SISrcMods::SEXT : 0u;
102 return Operand;
103 }
104
105 int64_t getModifiersOperand() const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 && "fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
112 return 0;
113 }
114
115 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116 };
117
118 enum ImmTy {
119 ImmTyNone,
120 ImmTyGDS,
121 ImmTyLDS,
122 ImmTyOffen,
123 ImmTyIdxen,
124 ImmTyAddr64,
125 ImmTyOffset,
126 ImmTyInstOffset,
127 ImmTyOffset0,
128 ImmTyOffset1,
129 ImmTySMEMOffsetMod,
130 ImmTyCPol,
131 ImmTyTFE,
132 ImmTyIsAsync,
133 ImmTyD16,
134 ImmTyClamp,
135 ImmTyOModSI,
136 ImmTySDWADstSel,
137 ImmTySDWASrc0Sel,
138 ImmTySDWASrc1Sel,
139 ImmTySDWADstUnused,
140 ImmTyDMask,
141 ImmTyDim,
142 ImmTyUNorm,
143 ImmTyDA,
144 ImmTyR128A16,
145 ImmTyA16,
146 ImmTyLWE,
147 ImmTyExpTgt,
148 ImmTyExpCompr,
149 ImmTyExpVM,
150 ImmTyDone,
151 ImmTyRowEn,
152 ImmTyFORMAT,
153 ImmTyHwreg,
154 ImmTyOff,
155 ImmTySendMsg,
156 ImmTyWaitEvent,
157 ImmTyInterpSlot,
158 ImmTyInterpAttr,
159 ImmTyInterpAttrChan,
160 ImmTyOpSel,
161 ImmTyOpSelHi,
162 ImmTyNegLo,
163 ImmTyNegHi,
164 ImmTyIndexKey8bit,
165 ImmTyIndexKey16bit,
166 ImmTyIndexKey32bit,
167 ImmTyDPP8,
168 ImmTyDppCtrl,
169 ImmTyDppRowMask,
170 ImmTyDppBankMask,
171 ImmTyDppBoundCtrl,
172 ImmTyDppFI,
173 ImmTySwizzle,
174 ImmTyGprIdxMode,
175 ImmTyHigh,
176 ImmTyBLGP,
177 ImmTyCBSZ,
178 ImmTyABID,
179 ImmTyEndpgm,
180 ImmTyWaitVDST,
181 ImmTyWaitEXP,
182 ImmTyWaitVAVDst,
183 ImmTyWaitVMVSrc,
184 ImmTyBitOp3,
185 ImmTyMatrixAFMT,
186 ImmTyMatrixBFMT,
187 ImmTyMatrixAScale,
188 ImmTyMatrixBScale,
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
191 ImmTyMatrixAReuse,
192 ImmTyMatrixBReuse,
193 ImmTyScaleSel,
194 ImmTyByteSel,
195 };
196
197private:
198 struct TokOp {
199 const char *Data;
200 unsigned Length;
201 };
202
203 struct ImmOp {
204 int64_t Val;
205 ImmTy Type;
206 bool IsFPImm;
207 Modifiers Mods;
208 };
209
210 struct RegOp {
211 MCRegister RegNo;
212 Modifiers Mods;
213 };
214
215 union {
216 TokOp Tok;
217 ImmOp Imm;
218 RegOp Reg;
219 const MCExpr *Expr;
220 };
221
222 // The index of the associated MCInst operand.
223 mutable int MCOpIdx = -1;
224
225public:
226 bool isToken() const override { return Kind == Token; }
227
228 bool isSymbolRefExpr() const {
229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 }
231
232 bool isImm() const override {
233 return Kind == Immediate;
234 }
235
236 bool isInlinableImm(MVT type) const;
237 bool isLiteralImm(MVT type) const;
238
239 bool isRegKind() const {
240 return Kind == Register;
241 }
242
243 bool isReg() const override {
244 return isRegKind() && !hasModifiers();
245 }
246
247 bool isRegOrInline(unsigned RCID, MVT type) const {
248 return isRegClass(RCID) || isInlinableImm(type);
249 }
250
251 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
253 }
254
255 bool isRegOrImmWithInt16InputMods() const {
256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
257 }
258
259 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
262 }
263
264 bool isRegOrImmWithInt32InputMods() const {
265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266 }
267
268 bool isRegOrInlineImmWithInt16InputMods() const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
270 }
271
272 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
275 }
276
277 bool isRegOrInlineImmWithInt32InputMods() const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrImmWithInt64InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
283 }
284
285 bool isRegOrImmWithFP16InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
287 }
288
289 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
292 }
293
294 bool isRegOrImmWithFP32InputMods() const {
295 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
296 }
297
298 bool isRegOrImmWithFP64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
300 }
301
302 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
305 }
306
307 bool isRegOrInlineImmWithFP32InputMods() const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
309 }
310
311 bool isRegOrInlineImmWithFP64InputMods() const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
313 }
314
315 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317 bool isVRegWithFP32InputMods() const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
319 }
320
321 bool isVRegWithFP64InputMods() const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
323 }
324
325 bool isPackedFP16InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
327 }
328
329 bool isPackedVGPRFP32InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isAV_LdSt_32_Align2_RegOp() const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
360 }
361
362 bool isVRegWithInputMods() const;
363 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364 template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366 bool isSDWAOperand(MVT type) const;
367 bool isSDWAFP16Operand() const;
368 bool isSDWAFP32Operand() const;
369 bool isSDWAInt16Operand() const;
370 bool isSDWAInt32Operand() const;
371
372 bool isImmTy(ImmTy ImmT) const {
373 return isImm() && Imm.Type == ImmT;
374 }
375
376 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
377
378 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
379
380 bool isImmModifier() const {
381 return isImm() && Imm.Type != ImmTyNone;
382 }
383
384 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
385 bool isDim() const { return isImmTy(ImmTyDim); }
386 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
387 bool isOff() const { return isImmTy(ImmTyOff); }
388 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
389 bool isOffen() const { return isImmTy(ImmTyOffen); }
390 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
391 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS() const { return isImmTy(ImmTyGDS); }
395 bool isLDS() const { return isImmTy(ImmTyLDS); }
396 bool isCPol() const { return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE() const { return isImmTy(ImmTyTFE); }
409 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
410 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
421 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
422 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
423 bool isDone() const { return isImmTy(ImmTyDone); }
424 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
425
426 bool isRegOrImm() const {
427 return isReg() || isImm();
428 }
429
430 bool isRegClass(unsigned RCID) const;
431
432 bool isInlineValue() const;
433
434 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
436 }
437
438 bool isSCSrcB16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
440 }
441
442 bool isSCSrcV2B16() const {
443 return isSCSrcB16();
444 }
445
446 bool isSCSrc_b32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
448 }
449
450 bool isSCSrc_b64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
452 }
453
454 bool isBoolReg() const;
455
456 bool isSCSrcF16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
458 }
459
460 bool isSCSrcV2F16() const {
461 return isSCSrcF16();
462 }
463
464 bool isSCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
466 }
467
468 bool isSCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
470 }
471
472 bool isSSrc_b32() const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
474 }
475
476 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
477
478 bool isSSrcV2B16() const {
479 llvm_unreachable("cannot happen");
480 return isSSrc_b16();
481 }
482
483 bool isSSrc_b64() const {
484 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485 // See isVSrc64().
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489 isExpr());
490 }
491
492 bool isSSrc_f32() const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
494 }
495
496 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
497
498 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
499
500 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
501
502 bool isSSrcV2F16() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_f16();
505 }
506
507 bool isSSrcV2FP32() const {
508 llvm_unreachable("cannot happen");
509 return isSSrc_f32();
510 }
511
512 bool isSCSrcV2FP32() const {
513 llvm_unreachable("cannot happen");
514 return isSCSrcF32();
515 }
516
517 bool isSSrcV2INT32() const {
518 llvm_unreachable("cannot happen");
519 return isSSrc_b32();
520 }
521
522 bool isSCSrcV2INT32() const {
523 llvm_unreachable("cannot happen");
524 return isSCSrc_b32();
525 }
526
527 bool isSSrcOrLds_b32() const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
530 }
531
532 bool isVCSrc_b32() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
534 }
535
536 bool isVCSrc_b32_Lo256() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
538 }
539
540 bool isVCSrc_b64_Lo256() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
542 }
543
544 bool isVCSrc_b64() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
546 }
547
548 bool isVCSrcT_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
550 }
551
552 bool isVCSrcTB16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
554 }
555
556 bool isVCSrcFake16B16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
558 }
559
560 bool isVCSrc_b16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
562 }
563
564 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566 bool isVCSrc_f32() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
568 }
569
570 bool isVCSrc_f64() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
572 }
573
574 bool isVCSrcTBF16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcT_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcT_bf16() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
584 }
585
586 bool isVCSrcTBF16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
588 }
589
590 bool isVCSrcTF16_Lo128() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
592 }
593
594 bool isVCSrcFake16BF16_Lo128() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
596 }
597
598 bool isVCSrcFake16F16_Lo128() const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
600 }
601
602 bool isVCSrc_bf16() const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
604 }
605
606 bool isVCSrc_f16() const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
608 }
609
610 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614 bool isVSrc_b32() const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
616 }
617
618 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
619
620 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
621
622 bool isVSrcT_b16_Lo128() const {
623 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
624 }
625
626 bool isVSrcFake16_b16_Lo128() const {
627 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
628 }
629
630 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
631
632 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
633
634 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
635
636 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
637
638 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
639
640 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
641
642 bool isVSrc_f32() const {
643 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
644 }
645
646 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
647
648 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
649
650 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
651
652 bool isVSrcT_bf16_Lo128() const {
653 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
654 }
655
656 bool isVSrcT_f16_Lo128() const {
657 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
658 }
659
660 bool isVSrcFake16_bf16_Lo128() const {
661 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
662 }
663
664 bool isVSrcFake16_f16_Lo128() const {
665 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
666 }
667
668 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
669
670 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
671
672 bool isVSrc_v2bf16() const {
673 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
674 }
675
676 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
677
678 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
679
680 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
681
682 bool isVISrcB32() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
684 }
685
686 bool isVISrcB16() const {
687 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
688 }
689
690 bool isVISrcV2B16() const {
691 return isVISrcB16();
692 }
693
694 bool isVISrcF32() const {
695 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
696 }
697
698 bool isVISrcF16() const {
699 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
700 }
701
702 bool isVISrcV2F16() const {
703 return isVISrcF16() || isVISrcB32();
704 }
705
706 bool isVISrc_64_bf16() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
708 }
709
710 bool isVISrc_64_f16() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
712 }
713
714 bool isVISrc_64_b32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_64B64() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
720 }
721
722 bool isVISrc_64_f64() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
724 }
725
726 bool isVISrc_64V2FP32() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
728 }
729
730 bool isVISrc_64V2INT32() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
732 }
733
734 bool isVISrc_256_b32() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
736 }
737
738 bool isVISrc_256_f32() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
740 }
741
742 bool isVISrc_256B64() const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
744 }
745
746 bool isVISrc_256_f64() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
748 }
749
750 bool isVISrc_512_f64() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
752 }
753
754 bool isVISrc_128B16() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
756 }
757
758 bool isVISrc_128V2B16() const {
759 return isVISrc_128B16();
760 }
761
762 bool isVISrc_128_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_128_f32() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
768 }
769
770 bool isVISrc_256V2FP32() const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
772 }
773
774 bool isVISrc_256V2INT32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
776 }
777
778 bool isVISrc_512_b32() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
780 }
781
782 bool isVISrc_512B16() const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
784 }
785
786 bool isVISrc_512V2B16() const {
787 return isVISrc_512B16();
788 }
789
790 bool isVISrc_512_f32() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
792 }
793
794 bool isVISrc_512F16() const {
795 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
796 }
797
798 bool isVISrc_512V2F16() const {
799 return isVISrc_512F16() || isVISrc_512_b32();
800 }
801
802 bool isVISrc_1024_b32() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
804 }
805
806 bool isVISrc_1024B16() const {
807 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
808 }
809
810 bool isVISrc_1024V2B16() const {
811 return isVISrc_1024B16();
812 }
813
814 bool isVISrc_1024_f32() const {
815 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
816 }
817
818 bool isVISrc_1024F16() const {
819 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
820 }
821
822 bool isVISrc_1024V2F16() const {
823 return isVISrc_1024F16() || isVISrc_1024_b32();
824 }
825
826 bool isAISrcB32() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
828 }
829
830 bool isAISrcB16() const {
831 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
832 }
833
834 bool isAISrcV2B16() const {
835 return isAISrcB16();
836 }
837
838 bool isAISrcF32() const {
839 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
840 }
841
842 bool isAISrcF16() const {
843 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
844 }
845
846 bool isAISrcV2F16() const {
847 return isAISrcF16() || isAISrcB32();
848 }
849
850 bool isAISrc_64B64() const {
851 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
852 }
853
854 bool isAISrc_64_f64() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
856 }
857
858 bool isAISrc_128_b32() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
860 }
861
862 bool isAISrc_128B16() const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
864 }
865
866 bool isAISrc_128V2B16() const {
867 return isAISrc_128B16();
868 }
869
870 bool isAISrc_128_f32() const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
872 }
873
874 bool isAISrc_128F16() const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
876 }
877
878 bool isAISrc_128V2F16() const {
879 return isAISrc_128F16() || isAISrc_128_b32();
880 }
881
882 bool isVISrc_128_bf16() const {
883 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
884 }
885
886 bool isVISrc_128_f16() const {
887 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
888 }
889
890 bool isVISrc_128V2F16() const {
891 return isVISrc_128_f16() || isVISrc_128_b32();
892 }
893
894 bool isAISrc_256B64() const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
896 }
897
898 bool isAISrc_256_f64() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
900 }
901
902 bool isAISrc_512_b32() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
904 }
905
906 bool isAISrc_512B16() const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
908 }
909
910 bool isAISrc_512V2B16() const {
911 return isAISrc_512B16();
912 }
913
914 bool isAISrc_512_f32() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
916 }
917
918 bool isAISrc_512F16() const {
919 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
920 }
921
922 bool isAISrc_512V2F16() const {
923 return isAISrc_512F16() || isAISrc_512_b32();
924 }
925
926 bool isAISrc_1024_b32() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
928 }
929
930 bool isAISrc_1024B16() const {
931 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
932 }
933
934 bool isAISrc_1024V2B16() const {
935 return isAISrc_1024B16();
936 }
937
938 bool isAISrc_1024_f32() const {
939 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
940 }
941
942 bool isAISrc_1024F16() const {
943 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
944 }
945
946 bool isAISrc_1024V2F16() const {
947 return isAISrc_1024F16() || isAISrc_1024_b32();
948 }
949
950 bool isKImmFP32() const {
951 return isLiteralImm(MVT::f32);
952 }
953
954 bool isKImmFP16() const {
955 return isLiteralImm(MVT::f16);
956 }
957
958 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
959
960 bool isMem() const override {
961 return false;
962 }
963
964 bool isExpr() const {
965 return Kind == Expression;
966 }
967
968 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
969
970 bool isSWaitCnt() const;
971 bool isDepCtr() const;
972 bool isSDelayALU() const;
973 bool isHwreg() const;
974 bool isSendMsg() const;
975 bool isWaitEvent() const;
976 bool isSplitBarrier() const;
977 bool isSwizzle() const;
978 bool isSMRDOffset8() const;
979 bool isSMEMOffset() const;
980 bool isSMRDLiteralOffset() const;
981 bool isDPP8() const;
982 bool isDPPCtrl() const;
983 bool isBLGP() const;
984 bool isGPRIdxMode() const;
985 bool isS16Imm() const;
986 bool isU16Imm() const;
987 bool isEndpgm() const;
988
989 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
990 return [this, P]() { return P(*this); };
991 }
992
993 StringRef getToken() const {
994 assert(isToken());
995 return StringRef(Tok.Data, Tok.Length);
996 }
997
998 int64_t getImm() const {
999 assert(isImm());
1000 return Imm.Val;
1001 }
1002
1003 void setImm(int64_t Val) {
1004 assert(isImm());
1005 Imm.Val = Val;
1006 }
1007
1008 ImmTy getImmTy() const {
1009 assert(isImm());
1010 return Imm.Type;
1011 }
1012
1013 MCRegister getReg() const override {
1014 assert(isRegKind());
1015 return Reg.RegNo;
1016 }
1017
1018 SMLoc getStartLoc() const override {
1019 return StartLoc;
1020 }
1021
1022 SMLoc getEndLoc() const override {
1023 return EndLoc;
1024 }
1025
1026 SMRange getLocRange() const {
1027 return SMRange(StartLoc, EndLoc);
1028 }
1029
1030 int getMCOpIdx() const { return MCOpIdx; }
1031
1032 Modifiers getModifiers() const {
1033 assert(isRegKind() || isImmTy(ImmTyNone));
1034 return isRegKind() ? Reg.Mods : Imm.Mods;
1035 }
1036
1037 void setModifiers(Modifiers Mods) {
1038 assert(isRegKind() || isImmTy(ImmTyNone));
1039 if (isRegKind())
1040 Reg.Mods = Mods;
1041 else
1042 Imm.Mods = Mods;
1043 }
1044
1045 bool hasModifiers() const {
1046 return getModifiers().hasModifiers();
1047 }
1048
1049 bool hasFPModifiers() const {
1050 return getModifiers().hasFPModifiers();
1051 }
1052
1053 bool hasIntModifiers() const {
1054 return getModifiers().hasIntModifiers();
1055 }
1056
1057 bool isForcedLit() const {
1058 return isImmLiteral() && getModifiers().isForcedLit();
1059 }
1060
1061 bool isForcedLit64() const {
1062 return isImmLiteral() && getModifiers().isForcedLit64();
1063 }
1064
1065 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1066
1067 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1068
1069 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1070
1071 void addRegOperands(MCInst &Inst, unsigned N) const;
1072
1073 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1074 if (isRegKind())
1075 addRegOperands(Inst, N);
1076 else
1077 addImmOperands(Inst, N);
1078 }
1079
1080 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1081 Modifiers Mods = getModifiers();
1082 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1083 if (isRegKind()) {
1084 addRegOperands(Inst, N);
1085 } else {
1086 addImmOperands(Inst, N, false);
1087 }
1088 }
1089
1090 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1091 assert(!hasIntModifiers());
1092 addRegOrImmWithInputModsOperands(Inst, N);
1093 }
1094
1095 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1096 assert(!hasFPModifiers());
1097 addRegOrImmWithInputModsOperands(Inst, N);
1098 }
1099
1100 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1101 Modifiers Mods = getModifiers();
1102 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1103 assert(isRegKind());
1104 addRegOperands(Inst, N);
1105 }
1106
1107 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1108 assert(!hasIntModifiers());
1109 addRegWithInputModsOperands(Inst, N);
1110 }
1111
1112 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1113 assert(!hasFPModifiers());
1114 addRegWithInputModsOperands(Inst, N);
1115 }
1116
1117 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1118 // clang-format off
1119 switch (Type) {
1120 case ImmTyNone: OS << "None"; break;
1121 case ImmTyGDS: OS << "GDS"; break;
1122 case ImmTyLDS: OS << "LDS"; break;
1123 case ImmTyOffen: OS << "Offen"; break;
1124 case ImmTyIdxen: OS << "Idxen"; break;
1125 case ImmTyAddr64: OS << "Addr64"; break;
1126 case ImmTyOffset: OS << "Offset"; break;
1127 case ImmTyInstOffset: OS << "InstOffset"; break;
1128 case ImmTyOffset0: OS << "Offset0"; break;
1129 case ImmTyOffset1: OS << "Offset1"; break;
1130 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1131 case ImmTyCPol: OS << "CPol"; break;
1132 case ImmTyIndexKey8bit: OS << "index_key"; break;
1133 case ImmTyIndexKey16bit: OS << "index_key"; break;
1134 case ImmTyIndexKey32bit: OS << "index_key"; break;
1135 case ImmTyTFE: OS << "TFE"; break;
1136 case ImmTyIsAsync: OS << "IsAsync"; break;
1137 case ImmTyD16: OS << "D16"; break;
1138 case ImmTyFORMAT: OS << "FORMAT"; break;
1139 case ImmTyClamp: OS << "Clamp"; break;
1140 case ImmTyOModSI: OS << "OModSI"; break;
1141 case ImmTyDPP8: OS << "DPP8"; break;
1142 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1143 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1144 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1145 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1146 case ImmTyDppFI: OS << "DppFI"; break;
1147 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1148 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1149 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1150 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1151 case ImmTyDMask: OS << "DMask"; break;
1152 case ImmTyDim: OS << "Dim"; break;
1153 case ImmTyUNorm: OS << "UNorm"; break;
1154 case ImmTyDA: OS << "DA"; break;
1155 case ImmTyR128A16: OS << "R128A16"; break;
1156 case ImmTyA16: OS << "A16"; break;
1157 case ImmTyLWE: OS << "LWE"; break;
1158 case ImmTyOff: OS << "Off"; break;
1159 case ImmTyExpTgt: OS << "ExpTgt"; break;
1160 case ImmTyExpCompr: OS << "ExpCompr"; break;
1161 case ImmTyExpVM: OS << "ExpVM"; break;
1162 case ImmTyDone: OS << "Done"; break;
1163 case ImmTyRowEn: OS << "RowEn"; break;
1164 case ImmTyHwreg: OS << "Hwreg"; break;
1165 case ImmTySendMsg: OS << "SendMsg"; break;
1166 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1167 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1168 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1169 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1170 case ImmTyOpSel: OS << "OpSel"; break;
1171 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1172 case ImmTyNegLo: OS << "NegLo"; break;
1173 case ImmTyNegHi: OS << "NegHi"; break;
1174 case ImmTySwizzle: OS << "Swizzle"; break;
1175 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1176 case ImmTyHigh: OS << "High"; break;
1177 case ImmTyBLGP: OS << "BLGP"; break;
1178 case ImmTyCBSZ: OS << "CBSZ"; break;
1179 case ImmTyABID: OS << "ABID"; break;
1180 case ImmTyEndpgm: OS << "Endpgm"; break;
1181 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1182 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1183 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1184 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1185 case ImmTyBitOp3: OS << "BitOp3"; break;
1186 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1187 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1188 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1189 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1190 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1191 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1192 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1193 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1194 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1195 case ImmTyByteSel: OS << "ByteSel" ; break;
1196 }
1197 // clang-format on
1198 }
1199
1200 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1201 switch (Kind) {
1202 case Register:
1203 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1204 << " mods: " << Reg.Mods << '>';
1205 break;
1206 case Immediate:
1207 OS << '<' << getImm();
1208 if (getImmTy() != ImmTyNone) {
1209 OS << " type: "; printImmTy(OS, getImmTy());
1210 }
1211 OS << " mods: " << Imm.Mods << '>';
1212 break;
1213 case Token:
1214 OS << '\'' << getToken() << '\'';
1215 break;
1216 case Expression:
1217 OS << "<expr ";
1218 MAI.printExpr(OS, *Expr);
1219 OS << '>';
1220 break;
1221 }
1222 }
1223
1224 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1225 int64_t Val, SMLoc Loc,
1226 ImmTy Type = ImmTyNone,
1227 bool IsFPImm = false) {
1228 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1229 Op->Imm.Val = Val;
1230 Op->Imm.IsFPImm = IsFPImm;
1231 Op->Imm.Type = Type;
1232 Op->Imm.Mods = Modifiers();
1233 Op->StartLoc = Loc;
1234 Op->EndLoc = Loc;
1235 return Op;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1239 StringRef Str, SMLoc Loc,
1240 bool HasExplicitEncodingSize = true) {
1241 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1242 Res->Tok.Data = Str.data();
1243 Res->Tok.Length = Str.size();
1244 Res->StartLoc = Loc;
1245 Res->EndLoc = Loc;
1246 return Res;
1247 }
1248
1249 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1250 MCRegister Reg, SMLoc S, SMLoc E) {
1251 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1252 Op->Reg.RegNo = Reg;
1253 Op->Reg.Mods = Modifiers();
1254 Op->StartLoc = S;
1255 Op->EndLoc = E;
1256 return Op;
1257 }
1258
1259 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1260 const class MCExpr *Expr, SMLoc S) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1262 Op->Expr = Expr;
1263 Op->StartLoc = S;
1264 Op->EndLoc = S;
1265 return Op;
1266 }
1267};
1268
1269raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1270 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1271 return OS;
1272}
1273
1274//===----------------------------------------------------------------------===//
1275// AsmParser
1276//===----------------------------------------------------------------------===//
1277
1278// TODO: define GET_SUBTARGET_FEATURE_NAME
1279#define GET_REGISTER_MATCHER
1280#include "AMDGPUGenAsmMatcher.inc"
1281#undef GET_REGISTER_MATCHER
1282#undef GET_SUBTARGET_FEATURE_NAME
1283
1284// Holds info related to the current kernel, e.g. count of SGPRs used.
1285// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1286// .amdgpu_hsa_kernel or at EOF.
1287class KernelScopeInfo {
1288 int SgprIndexUnusedMin = -1;
1289 int VgprIndexUnusedMin = -1;
1290 int AgprIndexUnusedMin = -1;
1291 MCContext *Ctx = nullptr;
1292 MCSubtargetInfo const *MSTI = nullptr;
1293
1294 void usesSgprAt(int i) {
1295 if (i >= SgprIndexUnusedMin) {
1296 SgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1300 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1301 }
1302 }
1303 }
1304
1305 void usesVgprAt(int i) {
1306 if (i >= VgprIndexUnusedMin) {
1307 VgprIndexUnusedMin = ++i;
1308 if (Ctx) {
1309 MCSymbol* const Sym =
1310 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1311 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1312 VgprIndexUnusedMin);
1313 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1314 }
1315 }
1316 }
1317
1318 void usesAgprAt(int i) {
1319 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1320 if (!hasMAIInsts(*MSTI))
1321 return;
1322
1323 if (i >= AgprIndexUnusedMin) {
1324 AgprIndexUnusedMin = ++i;
1325 if (Ctx) {
1326 MCSymbol* const Sym =
1327 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1328 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1329
1330 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1331 MCSymbol* const vSym =
1332 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1333 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1334 VgprIndexUnusedMin);
1335 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1336 }
1337 }
1338 }
1339
1340public:
1341 KernelScopeInfo() = default;
1342
1343 void initialize(MCContext &Context) {
1344 Ctx = &Context;
1345 MSTI = Ctx->getSubtargetInfo();
1346
1347 usesSgprAt(SgprIndexUnusedMin = -1);
1348 usesVgprAt(VgprIndexUnusedMin = -1);
1349 if (hasMAIInsts(*MSTI)) {
1350 usesAgprAt(AgprIndexUnusedMin = -1);
1351 }
1352 }
1353
1354 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1355 unsigned RegWidth) {
1356 switch (RegKind) {
1357 case IS_SGPR:
1358 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1359 break;
1360 case IS_AGPR:
1361 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1362 break;
1363 case IS_VGPR:
1364 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1365 break;
1366 default:
1367 break;
1368 }
1369 }
1370};
1371
1372class AMDGPUAsmParser : public MCTargetAsmParser {
1373 MCAsmParser &Parser;
1374
1375 unsigned ForcedEncodingSize = 0;
1376 bool ForcedDPP = false;
1377 bool ForcedSDWA = false;
1378 KernelScopeInfo KernelScope;
1379 const unsigned HwMode;
1380
1381 /// @name Auto-generated Match Functions
1382 /// {
1383
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1386
1387 /// }
1388
1389 /// Get size of register operand
1390 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1391 assert(OpNo < Desc.NumOperands);
1392 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1393 return getRegBitWidth(RCID) / 8;
1394 }
1395
1396 std::optional<AMDGPU::InfoSectionData> InfoData;
1397
1398private:
1399 void createConstantSymbol(StringRef Id, int64_t Val);
1400
1401 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1402 bool OutOfRangeError(SMRange Range);
1403 /// Calculate VGPR/SGPR blocks required for given target, reserved
1404 /// registers, and user-specified NextFreeXGPR values.
1405 ///
1406 /// \param Features [in] Target features, used for bug corrections.
1407 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1408 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1409 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1410 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1411 /// descriptor field, if valid.
1412 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1413 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1414 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1415 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1416 /// \param VGPRBlocks [out] Result VGPR block count.
1417 /// \param SGPRBlocks [out] Result SGPR block count.
1418 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1419 const MCExpr *FlatScrUsed, bool XNACKUsed,
1420 std::optional<bool> EnableWavefrontSize32,
1421 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1422 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1423 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1424 bool ParseDirectiveAMDGCNTarget();
1425 bool ParseDirectiveAMDHSACodeObjectVersion();
1426 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1428 bool ParseDirectiveAMDKernelCodeT();
1429 // TODO: Possibly make subtargetHasRegister const.
1430 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1431 bool ParseDirectiveAMDGPUHsaKernel();
1432
1433 bool ParseDirectiveISAVersion();
1434 bool ParseDirectiveHSAMetadata();
1435 bool ParseDirectivePALMetadataBegin();
1436 bool ParseDirectivePALMetadata();
1437 bool ParseDirectiveAMDGPULDS();
1438 bool ParseDirectiveAMDGPUInfo();
1439
1440 /// Common code to parse out a block of text (typically YAML) between start and
1441 /// end directives.
1442 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1443 const char *AssemblerDirectiveEnd,
1444 std::string &CollectString);
1445
1446 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1447 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1448 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1449 unsigned &RegNum, unsigned &RegWidth,
1450 bool RestoreOnFailure = false);
1451 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1452 unsigned &RegNum, unsigned &RegWidth,
1453 SmallVectorImpl<AsmToken> &Tokens);
1454 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1455 unsigned &RegWidth,
1456 SmallVectorImpl<AsmToken> &Tokens);
1457 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1458 unsigned &RegWidth,
1459 SmallVectorImpl<AsmToken> &Tokens);
1460 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1461 unsigned &RegWidth,
1462 SmallVectorImpl<AsmToken> &Tokens);
1463 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1464 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1465 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1466
1467 bool isRegister();
1468 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1469 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1470 void initializeGprCountSymbol(RegisterKind RegKind);
1471 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1472 unsigned RegWidth);
1473 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1474 bool IsAtomic);
1475
1476public:
1477 enum OperandMode {
1478 OperandMode_Default,
1479 OperandMode_NSA,
1480 };
1481
1482 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1483
1484 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1485 const MCInstrInfo &MII)
1486 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1487 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1489
1490 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1491
1492 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1493 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1494 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1495 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1496 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1497 } else {
1498 createConstantSymbol(".option.machine_version_major", ISA.Major);
1499 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1500 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1501 }
1502 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1503 initializeGprCountSymbol(IS_VGPR);
1504 initializeGprCountSymbol(IS_SGPR);
1505 } else
1506 KernelScope.initialize(getContext());
1507
1508 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1509 createConstantSymbol(Symbol, Code);
1510
1511 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1512 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1513 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1514 }
1515
1516 bool hasMIMG_R128() const {
1517 return AMDGPU::hasMIMG_R128(getSTI());
1518 }
1519
1520 bool hasPackedD16() const {
1521 return AMDGPU::hasPackedD16(getSTI());
1522 }
1523
1524 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1525
1526 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1527
1528 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1529
1530 bool isSI() const {
1531 return AMDGPU::isSI(getSTI());
1532 }
1533
1534 bool isCI() const {
1535 return AMDGPU::isCI(getSTI());
1536 }
1537
1538 bool isVI() const {
1539 return AMDGPU::isVI(getSTI());
1540 }
1541
1542 bool isGFX9() const {
1543 return AMDGPU::isGFX9(getSTI());
1544 }
1545
1546 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1547 bool isGFX90A() const {
1548 return AMDGPU::isGFX90A(getSTI());
1549 }
1550
1551 bool isGFX940() const {
1552 return AMDGPU::isGFX940(getSTI());
1553 }
1554
1555 bool isGFX9Plus() const {
1556 return AMDGPU::isGFX9Plus(getSTI());
1557 }
1558
1559 bool isGFX10() const {
1560 return AMDGPU::isGFX10(getSTI());
1561 }
1562
1563 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1564
1565 bool isGFX11() const {
1566 return AMDGPU::isGFX11(getSTI());
1567 }
1568
1569 bool isGFX11Plus() const {
1570 return AMDGPU::isGFX11Plus(getSTI());
1571 }
1572
1573 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1574
1575 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1576
1577 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1578
1579 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1580
1581 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1582
1583 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1584
1585 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1586
1587 bool isGFX10_BEncoding() const {
1588 return AMDGPU::isGFX10_BEncoding(getSTI());
1589 }
1590
1591 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1592
1593 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1594
1595 bool hasInv2PiInlineImm() const {
1596 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1597 }
1598
1599 bool has64BitLiterals() const {
1600 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1601 }
1602
1603 bool hasFlatOffsets() const {
1604 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1605 }
1606
1607 bool hasTrue16Insts() const {
1608 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1609 }
1610
1611 bool hasArchitectedFlatScratch() const {
1612 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1613 }
1614
1615 bool hasSGPR102_SGPR103() const {
1616 return !isVI() && !isGFX9();
1617 }
1618
1619 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1620
1621 bool hasIntClamp() const {
1622 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1623 }
1624
1625 bool hasPartialNSAEncoding() const {
1626 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1627 }
1628
1629 bool hasGloballyAddressableScratch() const {
1630 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1631 }
1632
1633 unsigned getNSAMaxSize(bool HasSampler = false) const {
1634 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1635 }
1636
1637 unsigned getMaxNumUserSGPRs() const {
1638 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1639 }
1640
1641 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1642
1643 AMDGPUTargetStreamer &getTargetStreamer() {
1644 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1645 return static_cast<AMDGPUTargetStreamer &>(TS);
1646 }
1647
1648 MCContext &getContext() const {
1649 // We need this const_cast because for some reason getContext() is not const
1650 // in MCAsmParser.
1651 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1652 }
1653
1654 const MCRegisterInfo *getMRI() const {
1655 return getContext().getRegisterInfo();
1656 }
1657
1658 const MCInstrInfo *getMII() const {
1659 return &MII;
1660 }
1661
1662 // FIXME: This should not be used. Instead, should use queries derived from
1663 // getAvailableFeatures().
1664 const FeatureBitset &getFeatureBits() const {
1665 return getSTI().getFeatureBits();
1666 }
1667
1668 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1669 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1670 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1671
1672 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1673 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1674 bool isForcedDPP() const { return ForcedDPP; }
1675 bool isForcedSDWA() const { return ForcedSDWA; }
1676 ArrayRef<unsigned> getMatchedVariants() const;
1677 StringRef getMatchedVariantName() const;
1678
1679 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1680 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1681 bool RestoreOnFailure);
1682 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1683 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1684 SMLoc &EndLoc) override;
1685 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1686 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1687 unsigned Kind) override;
1688 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1689 OperandVector &Operands, MCStreamer &Out,
1690 uint64_t &ErrorInfo,
1691 bool MatchingInlineAsm) override;
1692 bool ParseDirective(AsmToken DirectiveID) override;
1693 void onEndOfFile() override;
1694 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1695 OperandMode Mode = OperandMode_Default);
1696 StringRef parseMnemonicSuffix(StringRef Name);
1697 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1698 SMLoc NameLoc, OperandVector &Operands) override;
1699 //bool ProcessInstruction(MCInst &Inst);
1700
1701 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1702
1703 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1704
1705 ParseStatus
1706 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 std::function<bool(int64_t &)> ConvertResult = nullptr);
1709
1710 ParseStatus parseOperandArrayWithPrefix(
1711 const char *Prefix, OperandVector &Operands,
1712 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1713 bool (*ConvertResult)(int64_t &) = nullptr);
1714
1715 ParseStatus
1716 parseNamedBit(StringRef Name, OperandVector &Operands,
1717 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1718 bool IgnoreNegative = false);
1719 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1720 ParseStatus parseCPol(OperandVector &Operands);
1721 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1722 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1723 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1724 SMLoc &StringLoc);
1725 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1726 StringRef Name,
1727 ArrayRef<const char *> Ids,
1728 int64_t &IntVal);
1729 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1730 StringRef Name,
1731 ArrayRef<const char *> Ids,
1732 AMDGPUOperand::ImmTy Type);
1733
1734 bool isModifier();
1735 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1736 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1737 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1738 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1739 bool parseSP3NegModifier();
1740 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1741 LitModifier Lit = LitModifier::None);
1742 ParseStatus parseReg(OperandVector &Operands);
1743 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1744 LitModifier Lit = LitModifier::None);
1745 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1746 bool AllowImm = true);
1747 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1748 bool AllowImm = true);
1749 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1750 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1751 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1752 ParseStatus tryParseIndexKey(OperandVector &Operands,
1753 AMDGPUOperand::ImmTy ImmTy);
1754 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1755 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1756 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1757 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1758 AMDGPUOperand::ImmTy Type);
1759 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1760 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1761 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1762 AMDGPUOperand::ImmTy Type);
1763 ParseStatus parseMatrixAScale(OperandVector &Operands);
1764 ParseStatus parseMatrixBScale(OperandVector &Operands);
1765 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1766 AMDGPUOperand::ImmTy Type);
1767 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1768 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1769
1770 ParseStatus parseDfmtNfmt(int64_t &Format);
1771 ParseStatus parseUfmt(int64_t &Format);
1772 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1773 int64_t &Format);
1774 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1775 int64_t &Format);
1776 ParseStatus parseFORMAT(OperandVector &Operands);
1777 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1778 ParseStatus parseNumericFormat(int64_t &Format);
1779 ParseStatus parseFlatOffset(OperandVector &Operands);
1780 ParseStatus parseR128A16(OperandVector &Operands);
1781 ParseStatus parseBLGP(OperandVector &Operands);
1782 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1783 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1784
1785 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1786
1787 bool parseCnt(int64_t &IntVal);
1788 ParseStatus parseSWaitCnt(OperandVector &Operands);
1789
1790 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1791 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1792 ParseStatus parseDepCtr(OperandVector &Operands);
1793
1794 bool parseDelay(int64_t &Delay);
1795 ParseStatus parseSDelayALU(OperandVector &Operands);
1796
1797 ParseStatus parseHwreg(OperandVector &Operands);
1798
1799private:
1800 struct OperandInfoTy {
1801 SMLoc Loc;
1802 int64_t Val;
1803 bool IsSymbolic = false;
1804 bool IsDefined = false;
1805
1806 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1807 };
1808
1809 struct StructuredOpField : OperandInfoTy {
1810 StringLiteral Id;
1811 StringLiteral Desc;
1812 unsigned Width;
1813 bool IsDefined = false;
1814
1815 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1816 unsigned Width, int64_t Default)
1817 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1818 virtual ~StructuredOpField() = default;
1819
1820 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1821 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1822 return false;
1823 }
1824
1825 virtual bool validate(AMDGPUAsmParser &Parser) const {
1826 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1827 return Error(Parser, "not supported on this GPU");
1828 if (!isUIntN(Width, Val))
1829 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1830 return true;
1831 }
1832 };
1833
1834 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1835 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1836
1837 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1838 bool validateSendMsg(const OperandInfoTy &Msg,
1839 const OperandInfoTy &Op,
1840 const OperandInfoTy &Stream);
1841
1842 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1843 OperandInfoTy &Width);
1844
1845 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1846 int MCOpIdx) const;
1847
1848 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1849
1850 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1851 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1852 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1853
1854 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1855 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1856 const OperandVector &Operands) const;
1857 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1858 const OperandVector &Operands) const;
1859 SMLoc getInstLoc(const OperandVector &Operands) const;
1860
1861 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1862 const OperandVector &Operands);
1863 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1866 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1867 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1868 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1869 bool AsVOPD3);
1870 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1871 bool tryVOPD(const MCInst &Inst);
1872 bool tryVOPD3(const MCInst &Inst);
1873 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1874
1875 bool validateIntClampSupported(const MCInst &Inst);
1876 bool validateMIMGAtomicDMask(const MCInst &Inst);
1877 bool validateMIMGGatherDMask(const MCInst &Inst);
1878 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1879 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1880 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1881 bool validateMIMGD16(const MCInst &Inst);
1882 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateTensorR128(const MCInst &Inst);
1884 bool validateMIMGMSAA(const MCInst &Inst);
1885 bool validateOpSel(const MCInst &Inst);
1886 bool validateTrue16OpSel(const MCInst &Inst);
1887 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1888 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateVccOperand(MCRegister Reg) const;
1890 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1891 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1892 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1893 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1894 bool validateAGPRLdSt(const MCInst &Inst) const;
1895 bool validateVGPRAlign(const MCInst &Inst) const;
1896 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1897 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1898 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1899 bool validateDivScale(const MCInst &Inst);
1900 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1901 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1902 SMLoc IDLoc);
1903 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1904 const unsigned CPol);
1905 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1906 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1907 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1908 unsigned getConstantBusLimit(unsigned Opcode) const;
1909 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1910 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1911 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1912
1913 bool isSupportedMnemo(StringRef Mnemo,
1914 const FeatureBitset &FBS);
1915 bool isSupportedMnemo(StringRef Mnemo,
1916 const FeatureBitset &FBS,
1917 ArrayRef<unsigned> Variants);
1918 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1919
1920 bool isId(const StringRef Id) const;
1921 bool isId(const AsmToken &Token, const StringRef Id) const;
1922 bool isToken(const AsmToken::TokenKind Kind) const;
1923 StringRef getId() const;
1924 bool trySkipId(const StringRef Id);
1925 bool trySkipId(const StringRef Pref, const StringRef Id);
1926 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1927 bool trySkipToken(const AsmToken::TokenKind Kind);
1928 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1929 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1930 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1931
1932 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1933 AsmToken::TokenKind getTokenKind() const;
1934 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1935 bool parseExpr(OperandVector &Operands);
1936 StringRef getTokenStr() const;
1937 AsmToken peekToken(bool ShouldSkipSpace = true);
1938 AsmToken getToken() const;
1939 SMLoc getLoc() const;
1940 void lex();
1941
1942public:
1943 void onBeginOfFile() override;
1944 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1945
1946 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1947
1948 ParseStatus parseExpTgt(OperandVector &Operands);
1949 ParseStatus parseSendMsg(OperandVector &Operands);
1950 ParseStatus parseWaitEvent(OperandVector &Operands);
1951 ParseStatus parseInterpSlot(OperandVector &Operands);
1952 ParseStatus parseInterpAttr(OperandVector &Operands);
1953 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1954 ParseStatus parseBoolReg(OperandVector &Operands);
1955
1956 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1957 const unsigned MaxVal, const Twine &ErrMsg,
1958 SMLoc &Loc);
1959 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1960 const unsigned MinVal,
1961 const unsigned MaxVal,
1962 const StringRef ErrMsg);
1963 ParseStatus parseSwizzle(OperandVector &Operands);
1964 bool parseSwizzleOffset(int64_t &Imm);
1965 bool parseSwizzleMacro(int64_t &Imm);
1966 bool parseSwizzleQuadPerm(int64_t &Imm);
1967 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1968 bool parseSwizzleBroadcast(int64_t &Imm);
1969 bool parseSwizzleSwap(int64_t &Imm);
1970 bool parseSwizzleReverse(int64_t &Imm);
1971 bool parseSwizzleFFT(int64_t &Imm);
1972 bool parseSwizzleRotate(int64_t &Imm);
1973
1974 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1975 int64_t parseGPRIdxMacro();
1976
1977 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1978 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1979
1980 ParseStatus parseOModSI(OperandVector &Operands);
1981
1982 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1983 OptionalImmIndexMap &OptionalIdx);
1984 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1985 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1986 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1987 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1988 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1989
1990 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1991 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1992 OptionalImmIndexMap &OptionalIdx);
1993 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1994 OptionalImmIndexMap &OptionalIdx);
1995
1996 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1997 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1998 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1999
2000 bool parseDimId(unsigned &Encoding);
2001 ParseStatus parseDim(OperandVector &Operands);
2002 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2003 ParseStatus parseDPP8(OperandVector &Operands);
2004 ParseStatus parseDPPCtrl(OperandVector &Operands);
2005 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2006 int64_t parseDPPCtrlSel(StringRef Ctrl);
2007 int64_t parseDPPCtrlPerm();
2008 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2009 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2010 cvtDPP(Inst, Operands, true);
2011 }
2012 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2013 bool IsDPP8 = false);
2014 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2015 cvtVOP3DPP(Inst, Operands, true);
2016 }
2017
2018 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2019 AMDGPUOperand::ImmTy Type);
2020 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2021 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2022 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2023 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2024 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2025 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2026 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2027 uint64_t BasicInstType,
2028 bool SkipDstVcc = false,
2029 bool SkipSrcVcc = false);
2030
2031 ParseStatus parseEndpgm(OperandVector &Operands);
2032
2033 ParseStatus parseVOPD(OperandVector &Operands);
2034};
2035
2036} // end anonymous namespace
2037
2038// May be called with integer type with equivalent bitwidth.
2039static const fltSemantics *getFltSemantics(unsigned Size) {
2040 switch (Size) {
2041 case 4:
2042 return &APFloat::IEEEsingle();
2043 case 8:
2044 return &APFloat::IEEEdouble();
2045 case 2:
2046 return &APFloat::IEEEhalf();
2047 default:
2048 llvm_unreachable("unsupported fp type");
2049 }
2050}
2051
2053 return getFltSemantics(VT.getSizeInBits() / 8);
2054}
2055
2057 switch (OperandType) {
2058 // When floating-point immediate is used as operand of type i16, the 32-bit
2059 // representation of the constant truncated to the 16 LSBs should be used.
2074 return &APFloat::IEEEsingle();
2081 return &APFloat::IEEEdouble();
2089 return &APFloat::IEEEhalf();
2094 return &APFloat::BFloat();
2095 default:
2096 llvm_unreachable("unsupported fp type");
2097 }
2098}
2099
2100//===----------------------------------------------------------------------===//
2101// Operand
2102//===----------------------------------------------------------------------===//
2103
2104static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2105 bool Lost;
2106
2107 // Convert literal to single precision
2110 &Lost);
2111 // We allow precision lost but not overflow or underflow
2112 if (Status != APFloat::opOK &&
2113 Lost &&
2114 ((Status & APFloat::opOverflow) != 0 ||
2115 (Status & APFloat::opUnderflow) != 0)) {
2116 return false;
2117 }
2118
2119 return true;
2120}
2121
2122static bool isSafeTruncation(int64_t Val, unsigned Size) {
2123 return isUIntN(Size, Val) || isIntN(Size, Val);
2124}
2125
2126static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2127 if (VT.getScalarType() == MVT::i16)
2128 return isInlinableLiteral32(Val, HasInv2Pi);
2129
2130 if (VT.getScalarType() == MVT::f16)
2131 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2132
2133 assert(VT.getScalarType() == MVT::bf16);
2134
2135 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2136}
2137
2138bool AMDGPUOperand::isInlinableImm(MVT type) const {
2139
2140 // This is a hack to enable named inline values like
2141 // shared_base with both 32-bit and 64-bit operands.
2142 // Note that these values are defined as
2143 // 32-bit operands only.
2144 if (isInlineValue()) {
2145 return true;
2146 }
2147
2148 if (!isImmTy(ImmTyNone)) {
2149 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2150 return false;
2151 }
2152
2153 if (getModifiers().Lit != LitModifier::None)
2154 return false;
2155
2156 // TODO: We should avoid using host float here. It would be better to
2157 // check the float bit values which is what a few other places do.
2158 // We've had bot failures before due to weird NaN support on mips hosts.
2159
2160 APInt Literal(64, Imm.Val);
2161
2162 if (Imm.IsFPImm) { // We got fp literal token
2163 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2165 AsmParser->hasInv2PiInlineImm());
2166 }
2167
2168 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2169 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2170 return false;
2171
2172 if (type.getScalarSizeInBits() == 16) {
2173 bool Lost = false;
2174 switch (type.getScalarType().SimpleTy) {
2175 default:
2176 llvm_unreachable("unknown 16-bit type");
2177 case MVT::bf16:
2178 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2179 &Lost);
2180 break;
2181 case MVT::f16:
2182 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2183 &Lost);
2184 break;
2185 case MVT::i16:
2186 FPLiteral.convert(APFloatBase::IEEEsingle(),
2187 APFloat::rmNearestTiesToEven, &Lost);
2188 break;
2189 }
2190 // We need to use 32-bit representation here because when a floating-point
2191 // inline constant is used as an i16 operand, its 32-bit representation
2192 // representation will be used. We will need the 32-bit value to check if
2193 // it is FP inline constant.
2194 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2195 return isInlineableLiteralOp16(ImmVal, type,
2196 AsmParser->hasInv2PiInlineImm());
2197 }
2198
2199 // Check if single precision literal is inlinable
2201 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2202 AsmParser->hasInv2PiInlineImm());
2203 }
2204
2205 // We got int literal token.
2206 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2208 AsmParser->hasInv2PiInlineImm());
2209 }
2210
2211 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2212 return false;
2213 }
2214
2215 if (type.getScalarSizeInBits() == 16) {
2217 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2218 type, AsmParser->hasInv2PiInlineImm());
2219 }
2220
2222 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2223 AsmParser->hasInv2PiInlineImm());
2224}
2225
2226bool AMDGPUOperand::isLiteralImm(MVT type) const {
2227 // Check that this immediate can be added as literal
2228 if (!isImmTy(ImmTyNone)) {
2229 return false;
2230 }
2231
2232 bool Allow64Bit =
2233 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2234
2235 if (!Imm.IsFPImm) {
2236 // We got int literal token.
2237
2238 if (type == MVT::f64 && hasFPModifiers()) {
2239 // Cannot apply fp modifiers to int literals preserving the same semantics
2240 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2241 // disable these cases.
2242 return false;
2243 }
2244
2245 unsigned Size = type.getSizeInBits();
2246 if (Size == 64) {
2247 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2248 return true;
2249 Size = 32;
2250 }
2251
2252 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2253 // types.
2254 return isSafeTruncation(Imm.Val, Size);
2255 }
2256
2257 // We got fp literal token
2258 if (type == MVT::f64) { // Expected 64-bit fp operand
2259 // We would set low 64-bits of literal to zeroes but we accept this literals
2260 return true;
2261 }
2262
2263 if (type == MVT::i64) { // Expected 64-bit int operand
2264 // We don't allow fp literals in 64-bit integer instructions. It is
2265 // unclear how we should encode them.
2266 return false;
2267 }
2268
2269 // We allow fp literals with f16x2 operands assuming that the specified
2270 // literal goes into the lower half and the upper half is zero. We also
2271 // require that the literal may be losslessly converted to f16.
2272 //
2273 // For i16x2 operands, we assume that the specified literal is encoded as a
2274 // single-precision float. This is pretty odd, but it matches SP3 and what
2275 // happens in hardware.
2276 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2277 : (type == MVT::v2i16) ? MVT::f32
2278 : (type == MVT::v2f32) ? MVT::f32
2279 : type;
2280
2281 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2282 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2283}
2284
2285bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2286 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2287}
2288
2289bool AMDGPUOperand::isVRegWithInputMods() const {
2290 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2291 // GFX90A allows DPP on 64-bit operands.
2292 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2293 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2294}
2295
2296template <bool IsFake16>
2297bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2298 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2299 : AMDGPU::VGPR_16_Lo128RegClassID);
2300}
2301
2302template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2303 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2304 : AMDGPU::VGPR_16RegClassID);
2305}
2306
2307bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2308 if (AsmParser->isVI())
2309 return isVReg32();
2310 if (AsmParser->isGFX9Plus())
2311 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2312 return false;
2313}
2314
2315bool AMDGPUOperand::isSDWAFP16Operand() const {
2316 return isSDWAOperand(MVT::f16);
2317}
2318
2319bool AMDGPUOperand::isSDWAFP32Operand() const {
2320 return isSDWAOperand(MVT::f32);
2321}
2322
2323bool AMDGPUOperand::isSDWAInt16Operand() const {
2324 return isSDWAOperand(MVT::i16);
2325}
2326
2327bool AMDGPUOperand::isSDWAInt32Operand() const {
2328 return isSDWAOperand(MVT::i32);
2329}
2330
2331bool AMDGPUOperand::isBoolReg() const {
2332 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2333 (AsmParser->isWave32() && isSCSrc_b32()));
2334}
2335
2336uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2337{
2338 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2339 assert(Size == 2 || Size == 4 || Size == 8);
2340
2341 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2342
2343 if (Imm.Mods.Abs) {
2344 Val &= ~FpSignMask;
2345 }
2346 if (Imm.Mods.Neg) {
2347 Val ^= FpSignMask;
2348 }
2349
2350 return Val;
2351}
2352
2353void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2354 MCOpIdx = Inst.getNumOperands();
2355
2356 if (isExpr()) {
2358 return;
2359 }
2360
2361 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2362 Inst.getNumOperands())) {
2363 addLiteralImmOperand(Inst, Imm.Val,
2364 ApplyModifiers &
2365 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2366 } else {
2367 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2369 }
2370}
2371
2372void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2373 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2374 auto OpNum = Inst.getNumOperands();
2375 // Check that this operand accepts literals
2376 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2377
2378 if (ApplyModifiers) {
2379 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2380 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2381 Val = applyInputFPModifiers(Val, Size);
2382 }
2383
2384 APInt Literal(64, Val);
2385 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2386
2387 bool CanUse64BitLiterals =
2388 AsmParser->has64BitLiterals() &&
2389 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2390 LitModifier Lit = getModifiers().Lit;
2391 MCContext &Ctx = AsmParser->getContext();
2392
2393 if (Imm.IsFPImm) { // We got fp literal token
2394 switch (OpTy) {
2400 if (Lit == LitModifier::None &&
2402 AsmParser->hasInv2PiInlineImm())) {
2403 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2404 return;
2405 }
2406
2407 // Non-inlineable
2408 if (AMDGPU::isSISrcFPOperand(InstDesc,
2409 OpNum)) { // Expected 64-bit fp operand
2410 bool HasMandatoryLiteral =
2411 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2412 // For fp operands we check if low 32 bits are zeros
2413 if (Literal.getLoBits(32) != 0 &&
2414 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2415 !HasMandatoryLiteral) {
2416 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2417 Inst.getLoc(),
2418 "Can't encode literal as exact 64-bit floating-point operand. "
2419 "Low 32-bits will be set to zero");
2420 Val &= 0xffffffff00000000u;
2421 }
2422
2423 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2426 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2427 (isInt<32>(Val) || isUInt<32>(Val))) {
2428 // The floating-point operand will be verbalized as an
2429 // integer one. If that integer happens to fit 32 bits, on
2430 // re-assembling it will be intepreted as the high half of
2431 // the actual value, so we have to wrap it into lit64().
2432 Lit = LitModifier::Lit64;
2433 } else if (Lit == LitModifier::Lit) {
2434 // For FP64 operands lit() specifies the high half of the value.
2435 Val = Hi_32(Val);
2436 }
2437 }
2438 break;
2439 }
2440
2441 // We don't allow fp literals in 64-bit integer instructions. It is
2442 // unclear how we should encode them. This case should be checked earlier
2443 // in predicate methods (isLiteralImm())
2444 llvm_unreachable("fp literal in 64-bit integer instruction.");
2445
2447 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2448 (isInt<32>(Val) || isUInt<32>(Val)))
2449 Lit = LitModifier::Lit64;
2450 break;
2451
2456 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2457 Literal == 0x3fc45f306725feed) {
2458 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2459 // loss of precision. The constant represents ideomatic fp32 value of
2460 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2461 // bits. Prevent rounding below.
2462 Inst.addOperand(MCOperand::createImm(0x3e22));
2463 return;
2464 }
2465 [[fallthrough]];
2466
2488 bool lost;
2489 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2490 // Convert literal to single precision
2491 FPLiteral.convert(*getOpFltSemantics(OpTy),
2492 APFloat::rmNearestTiesToEven, &lost);
2493 // We allow precision lost but not overflow or underflow. This should be
2494 // checked earlier in isLiteralImm()
2495
2496 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2497 break;
2498 }
2499 default:
2500 llvm_unreachable("invalid operand size");
2501 }
2502
2503 if (Lit != LitModifier::None) {
2504 Inst.addOperand(
2506 } else {
2508 }
2509 return;
2510 }
2511
2512 // We got int literal token.
2513 // Only sign extend inline immediates.
2514 switch (OpTy) {
2529 break;
2530
2533 if (Lit == LitModifier::None &&
2534 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2536 return;
2537 }
2538
2539 // When the 32 MSBs are not zero (effectively means it can't be safely
2540 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2541 // the lit modifier is explicitly used, we need to truncate it to the 32
2542 // LSBs.
2543 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2544 Val = Lo_32(Val);
2545 break;
2546
2550 if (Lit == LitModifier::None &&
2551 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2553 return;
2554 }
2555
2556 // If the target doesn't support 64-bit literals, we need to use the
2557 // constant as the high 32 MSBs of a double-precision floating point value.
2558 if (!AsmParser->has64BitLiterals()) {
2559 Val = static_cast<uint64_t>(Val) << 32;
2560 } else {
2561 // Now the target does support 64-bit literals, there are two cases
2562 // where we still want to use src_literal encoding:
2563 // 1) explicitly forced by using lit modifier;
2564 // 2) the value is a valid 32-bit representation (signed or unsigned),
2565 // meanwhile not forced by lit64 modifier.
2566 if (Lit == LitModifier::Lit ||
2567 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2568 Val = static_cast<uint64_t>(Val) << 32;
2569 }
2570
2571 // For FP64 operands lit() specifies the high half of the value.
2572 if (Lit == LitModifier::Lit)
2573 Val = Hi_32(Val);
2574 break;
2575
2587 break;
2588
2590 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2591 Val <<= 32;
2592 break;
2593
2594 default:
2595 llvm_unreachable("invalid operand type");
2596 }
2597
2598 if (Lit != LitModifier::None) {
2599 Inst.addOperand(
2601 } else {
2603 }
2604}
2605
2606void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2607 MCOpIdx = Inst.getNumOperands();
2608 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2609}
2610
2611bool AMDGPUOperand::isInlineValue() const {
2612 return isRegKind() && ::isInlineValue(getReg());
2613}
2614
2615//===----------------------------------------------------------------------===//
2616// AsmParser
2617//===----------------------------------------------------------------------===//
2618
2619void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2620 // TODO: make those pre-defined variables read-only.
2621 // Currently there is none suitable machinery in the core llvm-mc for this.
2622 // MCSymbol::isRedefinable is intended for another purpose, and
2623 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2624 MCContext &Ctx = getContext();
2625 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2627}
2628
2629static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2630 if (Is == IS_VGPR) {
2631 switch (RegWidth) {
2632 default: return -1;
2633 case 32:
2634 return AMDGPU::VGPR_32RegClassID;
2635 case 64:
2636 return AMDGPU::VReg_64RegClassID;
2637 case 96:
2638 return AMDGPU::VReg_96RegClassID;
2639 case 128:
2640 return AMDGPU::VReg_128RegClassID;
2641 case 160:
2642 return AMDGPU::VReg_160RegClassID;
2643 case 192:
2644 return AMDGPU::VReg_192RegClassID;
2645 case 224:
2646 return AMDGPU::VReg_224RegClassID;
2647 case 256:
2648 return AMDGPU::VReg_256RegClassID;
2649 case 288:
2650 return AMDGPU::VReg_288RegClassID;
2651 case 320:
2652 return AMDGPU::VReg_320RegClassID;
2653 case 352:
2654 return AMDGPU::VReg_352RegClassID;
2655 case 384:
2656 return AMDGPU::VReg_384RegClassID;
2657 case 512:
2658 return AMDGPU::VReg_512RegClassID;
2659 case 1024:
2660 return AMDGPU::VReg_1024RegClassID;
2661 }
2662 } else if (Is == IS_TTMP) {
2663 switch (RegWidth) {
2664 default: return -1;
2665 case 32:
2666 return AMDGPU::TTMP_32RegClassID;
2667 case 64:
2668 return AMDGPU::TTMP_64RegClassID;
2669 case 128:
2670 return AMDGPU::TTMP_128RegClassID;
2671 case 256:
2672 return AMDGPU::TTMP_256RegClassID;
2673 case 512:
2674 return AMDGPU::TTMP_512RegClassID;
2675 }
2676 } else if (Is == IS_SGPR) {
2677 switch (RegWidth) {
2678 default: return -1;
2679 case 32:
2680 return AMDGPU::SGPR_32RegClassID;
2681 case 64:
2682 return AMDGPU::SGPR_64RegClassID;
2683 case 96:
2684 return AMDGPU::SGPR_96RegClassID;
2685 case 128:
2686 return AMDGPU::SGPR_128RegClassID;
2687 case 160:
2688 return AMDGPU::SGPR_160RegClassID;
2689 case 192:
2690 return AMDGPU::SGPR_192RegClassID;
2691 case 224:
2692 return AMDGPU::SGPR_224RegClassID;
2693 case 256:
2694 return AMDGPU::SGPR_256RegClassID;
2695 case 288:
2696 return AMDGPU::SGPR_288RegClassID;
2697 case 320:
2698 return AMDGPU::SGPR_320RegClassID;
2699 case 352:
2700 return AMDGPU::SGPR_352RegClassID;
2701 case 384:
2702 return AMDGPU::SGPR_384RegClassID;
2703 case 512:
2704 return AMDGPU::SGPR_512RegClassID;
2705 }
2706 } else if (Is == IS_AGPR) {
2707 switch (RegWidth) {
2708 default: return -1;
2709 case 32:
2710 return AMDGPU::AGPR_32RegClassID;
2711 case 64:
2712 return AMDGPU::AReg_64RegClassID;
2713 case 96:
2714 return AMDGPU::AReg_96RegClassID;
2715 case 128:
2716 return AMDGPU::AReg_128RegClassID;
2717 case 160:
2718 return AMDGPU::AReg_160RegClassID;
2719 case 192:
2720 return AMDGPU::AReg_192RegClassID;
2721 case 224:
2722 return AMDGPU::AReg_224RegClassID;
2723 case 256:
2724 return AMDGPU::AReg_256RegClassID;
2725 case 288:
2726 return AMDGPU::AReg_288RegClassID;
2727 case 320:
2728 return AMDGPU::AReg_320RegClassID;
2729 case 352:
2730 return AMDGPU::AReg_352RegClassID;
2731 case 384:
2732 return AMDGPU::AReg_384RegClassID;
2733 case 512:
2734 return AMDGPU::AReg_512RegClassID;
2735 case 1024:
2736 return AMDGPU::AReg_1024RegClassID;
2737 }
2738 }
2739 return -1;
2740}
2741
2744 .Case("exec", AMDGPU::EXEC)
2745 .Case("vcc", AMDGPU::VCC)
2746 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2747 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2748 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2749 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2750 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2751 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2752 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2753 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2754 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2755 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2756 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2757 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2758 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2759 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2760 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2761 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2762 .Case("m0", AMDGPU::M0)
2763 .Case("vccz", AMDGPU::SRC_VCCZ)
2764 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2765 .Case("execz", AMDGPU::SRC_EXECZ)
2766 .Case("src_execz", AMDGPU::SRC_EXECZ)
2767 .Case("scc", AMDGPU::SRC_SCC)
2768 .Case("src_scc", AMDGPU::SRC_SCC)
2769 .Case("tba", AMDGPU::TBA)
2770 .Case("tma", AMDGPU::TMA)
2771 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2772 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2773 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2774 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2775 .Case("vcc_lo", AMDGPU::VCC_LO)
2776 .Case("vcc_hi", AMDGPU::VCC_HI)
2777 .Case("exec_lo", AMDGPU::EXEC_LO)
2778 .Case("exec_hi", AMDGPU::EXEC_HI)
2779 .Case("tma_lo", AMDGPU::TMA_LO)
2780 .Case("tma_hi", AMDGPU::TMA_HI)
2781 .Case("tba_lo", AMDGPU::TBA_LO)
2782 .Case("tba_hi", AMDGPU::TBA_HI)
2783 .Case("pc", AMDGPU::PC_REG)
2784 .Case("null", AMDGPU::SGPR_NULL)
2785 .Default(AMDGPU::NoRegister);
2786}
2787
2788bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2789 SMLoc &EndLoc, bool RestoreOnFailure) {
2790 auto R = parseRegister();
2791 if (!R) return true;
2792 assert(R->isReg());
2793 RegNo = R->getReg();
2794 StartLoc = R->getStartLoc();
2795 EndLoc = R->getEndLoc();
2796 return false;
2797}
2798
2799bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2800 SMLoc &EndLoc) {
2801 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2802}
2803
2804ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2805 SMLoc &EndLoc) {
2806 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2807 bool PendingErrors = getParser().hasPendingError();
2808 getParser().clearPendingErrors();
2809 if (PendingErrors)
2810 return ParseStatus::Failure;
2811 if (Result)
2812 return ParseStatus::NoMatch;
2813 return ParseStatus::Success;
2814}
2815
2816bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2817 RegisterKind RegKind,
2818 MCRegister Reg1, SMLoc Loc) {
2819 switch (RegKind) {
2820 case IS_SPECIAL:
2821 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2822 Reg = AMDGPU::EXEC;
2823 RegWidth = 64;
2824 return true;
2825 }
2826 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2827 Reg = AMDGPU::FLAT_SCR;
2828 RegWidth = 64;
2829 return true;
2830 }
2831 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2832 Reg = AMDGPU::XNACK_MASK;
2833 RegWidth = 64;
2834 return true;
2835 }
2836 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2837 Reg = AMDGPU::VCC;
2838 RegWidth = 64;
2839 return true;
2840 }
2841 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2842 Reg = AMDGPU::TBA;
2843 RegWidth = 64;
2844 return true;
2845 }
2846 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2847 Reg = AMDGPU::TMA;
2848 RegWidth = 64;
2849 return true;
2850 }
2851 Error(Loc, "register does not fit in the list");
2852 return false;
2853 case IS_VGPR:
2854 case IS_SGPR:
2855 case IS_AGPR:
2856 case IS_TTMP:
2857 if (Reg1 != Reg + RegWidth / 32) {
2858 Error(Loc, "registers in a list must have consecutive indices");
2859 return false;
2860 }
2861 RegWidth += 32;
2862 return true;
2863 default:
2864 llvm_unreachable("unexpected register kind");
2865 }
2866}
2867
2868struct RegInfo {
2870 RegisterKind Kind;
2871};
2872
2873static constexpr RegInfo RegularRegisters[] = {
2874 {{"v"}, IS_VGPR},
2875 {{"s"}, IS_SGPR},
2876 {{"ttmp"}, IS_TTMP},
2877 {{"acc"}, IS_AGPR},
2878 {{"a"}, IS_AGPR},
2879};
2880
2881static bool isRegularReg(RegisterKind Kind) {
2882 return Kind == IS_VGPR ||
2883 Kind == IS_SGPR ||
2884 Kind == IS_TTMP ||
2885 Kind == IS_AGPR;
2886}
2887
2889 for (const RegInfo &Reg : RegularRegisters)
2890 if (Str.starts_with(Reg.Name))
2891 return &Reg;
2892 return nullptr;
2893}
2894
2895static bool getRegNum(StringRef Str, unsigned& Num) {
2896 return !Str.getAsInteger(10, Num);
2897}
2898
2899bool
2900AMDGPUAsmParser::isRegister(const AsmToken &Token,
2901 const AsmToken &NextToken) const {
2902
2903 // A list of consecutive registers: [s0,s1,s2,s3]
2904 if (Token.is(AsmToken::LBrac))
2905 return true;
2906
2907 if (!Token.is(AsmToken::Identifier))
2908 return false;
2909
2910 // A single register like s0 or a range of registers like s[0:1]
2911
2912 StringRef Str = Token.getString();
2913 const RegInfo *Reg = getRegularRegInfo(Str);
2914 if (Reg) {
2915 StringRef RegName = Reg->Name;
2916 StringRef RegSuffix = Str.substr(RegName.size());
2917 if (!RegSuffix.empty()) {
2918 RegSuffix.consume_back(".l");
2919 RegSuffix.consume_back(".h");
2920 unsigned Num;
2921 // A single register with an index: rXX
2922 if (getRegNum(RegSuffix, Num))
2923 return true;
2924 } else {
2925 // A range of registers: r[XX:YY].
2926 if (NextToken.is(AsmToken::LBrac))
2927 return true;
2928 }
2929 }
2930
2931 return getSpecialRegForName(Str).isValid();
2932}
2933
2934bool
2935AMDGPUAsmParser::isRegister()
2936{
2937 return isRegister(getToken(), peekToken());
2938}
2939
2940MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2941 unsigned SubReg, unsigned RegWidth,
2942 SMLoc Loc) {
2943 assert(isRegularReg(RegKind));
2944
2945 unsigned AlignSize = 1;
2946 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2947 // SGPR and TTMP registers must be aligned.
2948 // Max required alignment is 4 dwords.
2949 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2950 }
2951
2952 if (RegNum % AlignSize != 0) {
2953 Error(Loc, "invalid register alignment");
2954 return MCRegister();
2955 }
2956
2957 unsigned RegIdx = RegNum / AlignSize;
2958 int RCID = getRegClass(RegKind, RegWidth);
2959 if (RCID == -1) {
2960 Error(Loc, "invalid or unsupported register size");
2961 return MCRegister();
2962 }
2963
2964 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2965 const MCRegisterClass RC = TRI->getRegClass(RCID);
2966 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2967 Error(Loc, "register index is out of range");
2968 return AMDGPU::NoRegister;
2969 }
2970
2971 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2972 Error(Loc, "register index is out of range");
2973 return MCRegister();
2974 }
2975
2976 MCRegister Reg = RC.getRegister(RegIdx);
2977
2978 if (SubReg) {
2979 Reg = TRI->getSubReg(Reg, SubReg);
2980
2981 // Currently all regular registers have their .l and .h subregisters, so
2982 // we should never need to generate an error here.
2983 assert(Reg && "Invalid subregister!");
2984 }
2985
2986 return Reg;
2987}
2988
2989bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2990 unsigned &SubReg) {
2991 int64_t RegLo, RegHi;
2992 if (!skipToken(AsmToken::LBrac, "missing register index"))
2993 return false;
2994
2995 SMLoc FirstIdxLoc = getLoc();
2996 SMLoc SecondIdxLoc;
2997
2998 if (!parseExpr(RegLo))
2999 return false;
3000
3001 if (trySkipToken(AsmToken::Colon)) {
3002 SecondIdxLoc = getLoc();
3003 if (!parseExpr(RegHi))
3004 return false;
3005 } else {
3006 RegHi = RegLo;
3007 }
3008
3009 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3010 return false;
3011
3012 if (!isUInt<32>(RegLo)) {
3013 Error(FirstIdxLoc, "invalid register index");
3014 return false;
3015 }
3016
3017 if (!isUInt<32>(RegHi)) {
3018 Error(SecondIdxLoc, "invalid register index");
3019 return false;
3020 }
3021
3022 if (RegLo > RegHi) {
3023 Error(FirstIdxLoc, "first register index should not exceed second index");
3024 return false;
3025 }
3026
3027 if (RegHi == RegLo) {
3028 StringRef RegSuffix = getTokenStr();
3029 if (RegSuffix == ".l") {
3030 SubReg = AMDGPU::lo16;
3031 lex();
3032 } else if (RegSuffix == ".h") {
3033 SubReg = AMDGPU::hi16;
3034 lex();
3035 }
3036 }
3037
3038 Num = static_cast<unsigned>(RegLo);
3039 RegWidth = 32 * ((RegHi - RegLo) + 1);
3040
3041 return true;
3042}
3043
3044MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3045 unsigned &RegNum,
3046 unsigned &RegWidth,
3047 SmallVectorImpl<AsmToken> &Tokens) {
3048 assert(isToken(AsmToken::Identifier));
3049 MCRegister Reg = getSpecialRegForName(getTokenStr());
3050 if (Reg) {
3051 RegNum = 0;
3052 RegWidth = 32;
3053 RegKind = IS_SPECIAL;
3054 Tokens.push_back(getToken());
3055 lex(); // skip register name
3056 }
3057 return Reg;
3058}
3059
3060MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3061 unsigned &RegNum,
3062 unsigned &RegWidth,
3063 SmallVectorImpl<AsmToken> &Tokens) {
3064 assert(isToken(AsmToken::Identifier));
3065 StringRef RegName = getTokenStr();
3066 auto Loc = getLoc();
3067
3068 const RegInfo *RI = getRegularRegInfo(RegName);
3069 if (!RI) {
3070 Error(Loc, "invalid register name");
3071 return MCRegister();
3072 }
3073
3074 Tokens.push_back(getToken());
3075 lex(); // skip register name
3076
3077 RegKind = RI->Kind;
3078 StringRef RegSuffix = RegName.substr(RI->Name.size());
3079 unsigned SubReg = NoSubRegister;
3080 if (!RegSuffix.empty()) {
3081 if (RegSuffix.consume_back(".l"))
3082 SubReg = AMDGPU::lo16;
3083 else if (RegSuffix.consume_back(".h"))
3084 SubReg = AMDGPU::hi16;
3085
3086 // Single 32-bit register: vXX.
3087 if (!getRegNum(RegSuffix, RegNum)) {
3088 Error(Loc, "invalid register index");
3089 return MCRegister();
3090 }
3091 RegWidth = 32;
3092 } else {
3093 // Range of registers: v[XX:YY]. ":YY" is optional.
3094 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3095 return MCRegister();
3096 }
3097
3098 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3099}
3100
3101MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3102 unsigned &RegNum, unsigned &RegWidth,
3103 SmallVectorImpl<AsmToken> &Tokens) {
3104 MCRegister Reg;
3105 auto ListLoc = getLoc();
3106
3107 if (!skipToken(AsmToken::LBrac,
3108 "expected a register or a list of registers")) {
3109 return MCRegister();
3110 }
3111
3112 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3113
3114 auto Loc = getLoc();
3115 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3116 return MCRegister();
3117 if (RegWidth != 32) {
3118 Error(Loc, "expected a single 32-bit register");
3119 return MCRegister();
3120 }
3121
3122 for (; trySkipToken(AsmToken::Comma); ) {
3123 RegisterKind NextRegKind;
3124 MCRegister NextReg;
3125 unsigned NextRegNum, NextRegWidth;
3126 Loc = getLoc();
3127
3128 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3129 NextRegNum, NextRegWidth,
3130 Tokens)) {
3131 return MCRegister();
3132 }
3133 if (NextRegWidth != 32) {
3134 Error(Loc, "expected a single 32-bit register");
3135 return MCRegister();
3136 }
3137 if (NextRegKind != RegKind) {
3138 Error(Loc, "registers in a list must be of the same kind");
3139 return MCRegister();
3140 }
3141 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3142 return MCRegister();
3143 }
3144
3145 if (!skipToken(AsmToken::RBrac,
3146 "expected a comma or a closing square bracket")) {
3147 return MCRegister();
3148 }
3149
3150 if (isRegularReg(RegKind))
3151 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3152
3153 return Reg;
3154}
3155
3156bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3157 MCRegister &Reg, unsigned &RegNum,
3158 unsigned &RegWidth,
3159 SmallVectorImpl<AsmToken> &Tokens) {
3160 auto Loc = getLoc();
3161 Reg = MCRegister();
3162
3163 if (isToken(AsmToken::Identifier)) {
3164 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3165 if (!Reg)
3166 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3167 } else {
3168 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3169 }
3170
3171 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3172 if (!Reg) {
3173 assert(Parser.hasPendingError());
3174 return false;
3175 }
3176
3177 if (!subtargetHasRegister(*TRI, Reg)) {
3178 if (Reg == AMDGPU::SGPR_NULL) {
3179 Error(Loc, "'null' operand is not supported on this GPU");
3180 } else {
3182 " register not available on this GPU");
3183 }
3184 return false;
3185 }
3186
3187 return true;
3188}
3189
3190bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3191 MCRegister &Reg, unsigned &RegNum,
3192 unsigned &RegWidth,
3193 bool RestoreOnFailure /*=false*/) {
3194 Reg = MCRegister();
3195
3197 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3198 if (RestoreOnFailure) {
3199 while (!Tokens.empty()) {
3200 getLexer().UnLex(Tokens.pop_back_val());
3201 }
3202 }
3203 return true;
3204 }
3205 return false;
3206}
3207
3208std::optional<StringRef>
3209AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3210 switch (RegKind) {
3211 case IS_VGPR:
3212 return StringRef(".amdgcn.next_free_vgpr");
3213 case IS_SGPR:
3214 return StringRef(".amdgcn.next_free_sgpr");
3215 default:
3216 return std::nullopt;
3217 }
3218}
3219
3220void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3221 auto SymbolName = getGprCountSymbolName(RegKind);
3222 assert(SymbolName && "initializing invalid register kind");
3223 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3225 Sym->setRedefinable(true);
3226}
3227
3228bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3229 unsigned DwordRegIndex,
3230 unsigned RegWidth) {
3231 // Symbols are only defined for GCN targets
3232 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3233 return true;
3234
3235 auto SymbolName = getGprCountSymbolName(RegKind);
3236 if (!SymbolName)
3237 return true;
3238 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3239
3240 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3241 int64_t OldCount;
3242
3243 if (!Sym->isVariable())
3244 return !Error(getLoc(),
3245 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3246 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3247 return !Error(
3248 getLoc(),
3249 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3250
3251 if (OldCount <= NewMax)
3253
3254 return true;
3255}
3256
3257std::unique_ptr<AMDGPUOperand>
3258AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3259 const auto &Tok = getToken();
3260 SMLoc StartLoc = Tok.getLoc();
3261 SMLoc EndLoc = Tok.getEndLoc();
3262 RegisterKind RegKind;
3263 MCRegister Reg;
3264 unsigned RegNum, RegWidth;
3265
3266 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3267 return nullptr;
3268 }
3269 if (isHsaAbi(getSTI())) {
3270 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3271 return nullptr;
3272 } else
3273 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3274 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3275}
3276
3277ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3278 bool HasSP3AbsModifier, LitModifier Lit) {
3279 // TODO: add syntactic sugar for 1/(2*PI)
3280
3281 if (isRegister() || isModifier())
3282 return ParseStatus::NoMatch;
3283
3284 if (Lit == LitModifier::None) {
3285 if (trySkipId("lit"))
3286 Lit = LitModifier::Lit;
3287 else if (trySkipId("lit64"))
3288 Lit = LitModifier::Lit64;
3289
3290 if (Lit != LitModifier::None) {
3291 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3292 return ParseStatus::Failure;
3293 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3294 if (S.isSuccess() &&
3295 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3296 return ParseStatus::Failure;
3297 return S;
3298 }
3299 }
3300
3301 const auto& Tok = getToken();
3302 const auto& NextTok = peekToken();
3303 bool IsReal = Tok.is(AsmToken::Real);
3304 SMLoc S = getLoc();
3305 bool Negate = false;
3306
3307 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3308 lex();
3309 IsReal = true;
3310 Negate = true;
3311 }
3312
3313 AMDGPUOperand::Modifiers Mods;
3314 Mods.Lit = Lit;
3315
3316 if (IsReal) {
3317 // Floating-point expressions are not supported.
3318 // Can only allow floating-point literals with an
3319 // optional sign.
3320
3321 StringRef Num = getTokenStr();
3322 lex();
3323
3324 APFloat RealVal(APFloat::IEEEdouble());
3325 auto roundMode = APFloat::rmNearestTiesToEven;
3326 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3327 return ParseStatus::Failure;
3328 if (Negate)
3329 RealVal.changeSign();
3330
3331 Operands.push_back(
3332 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3333 AMDGPUOperand::ImmTyNone, true));
3334 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3335 Op.setModifiers(Mods);
3336
3337 return ParseStatus::Success;
3338
3339 } else {
3340 int64_t IntVal;
3341 const MCExpr *Expr;
3342 SMLoc S = getLoc();
3343
3344 if (HasSP3AbsModifier) {
3345 // This is a workaround for handling expressions
3346 // as arguments of SP3 'abs' modifier, for example:
3347 // |1.0|
3348 // |-1|
3349 // |1+x|
3350 // This syntax is not compatible with syntax of standard
3351 // MC expressions (due to the trailing '|').
3352 SMLoc EndLoc;
3353 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3354 return ParseStatus::Failure;
3355 } else {
3356 if (Parser.parseExpression(Expr))
3357 return ParseStatus::Failure;
3358 }
3359
3360 if (Expr->evaluateAsAbsolute(IntVal)) {
3361 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3362 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3363 Op.setModifiers(Mods);
3364 } else {
3365 if (Lit != LitModifier::None)
3366 return ParseStatus::NoMatch;
3367 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3368 }
3369
3370 return ParseStatus::Success;
3371 }
3372
3373 return ParseStatus::NoMatch;
3374}
3375
3376ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3377 if (!isRegister())
3378 return ParseStatus::NoMatch;
3379
3380 if (auto R = parseRegister()) {
3381 assert(R->isReg());
3382 Operands.push_back(std::move(R));
3383 return ParseStatus::Success;
3384 }
3385 return ParseStatus::Failure;
3386}
3387
3388ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3389 bool HasSP3AbsMod, LitModifier Lit) {
3390 ParseStatus Res = parseReg(Operands);
3391 if (!Res.isNoMatch())
3392 return Res;
3393 if (isModifier())
3394 return ParseStatus::NoMatch;
3395 return parseImm(Operands, HasSP3AbsMod, Lit);
3396}
3397
3398bool
3399AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3400 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3401 const auto &str = Token.getString();
3402 return str == "abs" || str == "neg" || str == "sext";
3403 }
3404 return false;
3405}
3406
3407bool
3408AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3409 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3410}
3411
3412bool
3413AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3414 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3415}
3416
3417bool
3418AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3419 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3420}
3421
3422// Check if this is an operand modifier or an opcode modifier
3423// which may look like an expression but it is not. We should
3424// avoid parsing these modifiers as expressions. Currently
3425// recognized sequences are:
3426// |...|
3427// abs(...)
3428// neg(...)
3429// sext(...)
3430// -reg
3431// -|...|
3432// -abs(...)
3433// name:...
3434//
3435bool
3436AMDGPUAsmParser::isModifier() {
3437
3438 AsmToken Tok = getToken();
3439 AsmToken NextToken[2];
3440 peekTokens(NextToken);
3441
3442 return isOperandModifier(Tok, NextToken[0]) ||
3443 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3444 isOpcodeModifierWithVal(Tok, NextToken[0]);
3445}
3446
3447// Check if the current token is an SP3 'neg' modifier.
3448// Currently this modifier is allowed in the following context:
3449//
3450// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3451// 2. Before an 'abs' modifier: -abs(...)
3452// 3. Before an SP3 'abs' modifier: -|...|
3453//
3454// In all other cases "-" is handled as a part
3455// of an expression that follows the sign.
3456//
3457// Note: When "-" is followed by an integer literal,
3458// this is interpreted as integer negation rather
3459// than a floating-point NEG modifier applied to N.
3460// Beside being contr-intuitive, such use of floating-point
3461// NEG modifier would have resulted in different meaning
3462// of integer literals used with VOP1/2/C and VOP3,
3463// for example:
3464// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3465// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3466// Negative fp literals with preceding "-" are
3467// handled likewise for uniformity
3468//
3469bool
3470AMDGPUAsmParser::parseSP3NegModifier() {
3471
3472 AsmToken NextToken[2];
3473 peekTokens(NextToken);
3474
3475 if (isToken(AsmToken::Minus) &&
3476 (isRegister(NextToken[0], NextToken[1]) ||
3477 NextToken[0].is(AsmToken::Pipe) ||
3478 isId(NextToken[0], "abs"))) {
3479 lex();
3480 return true;
3481 }
3482
3483 return false;
3484}
3485
3486ParseStatus
3487AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3488 bool AllowImm) {
3489 bool Neg, SP3Neg;
3490 bool Abs, SP3Abs;
3491 SMLoc Loc;
3492
3493 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3494 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3495 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3496
3497 SP3Neg = parseSP3NegModifier();
3498
3499 Loc = getLoc();
3500 Neg = trySkipId("neg");
3501 if (Neg && SP3Neg)
3502 return Error(Loc, "expected register or immediate");
3503 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3504 return ParseStatus::Failure;
3505
3506 Abs = trySkipId("abs");
3507 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3508 return ParseStatus::Failure;
3509
3510 LitModifier Lit = LitModifier::None;
3511 if (trySkipId("lit")) {
3512 Lit = LitModifier::Lit;
3513 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3514 return ParseStatus::Failure;
3515 } else if (trySkipId("lit64")) {
3516 Lit = LitModifier::Lit64;
3517 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3518 return ParseStatus::Failure;
3519 if (!has64BitLiterals())
3520 return Error(Loc, "lit64 is not supported on this GPU");
3521 }
3522
3523 Loc = getLoc();
3524 SP3Abs = trySkipToken(AsmToken::Pipe);
3525 if (Abs && SP3Abs)
3526 return Error(Loc, "expected register or immediate");
3527
3528 ParseStatus Res;
3529 if (AllowImm) {
3530 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3531 } else {
3532 Res = parseReg(Operands);
3533 }
3534 if (!Res.isSuccess())
3535 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3537 : Res;
3538
3539 if (Lit != LitModifier::None && !Operands.back()->isImm())
3540 Error(Loc, "expected immediate with lit modifier");
3541
3542 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3543 return ParseStatus::Failure;
3544 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3545 return ParseStatus::Failure;
3546 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3547 return ParseStatus::Failure;
3548 if (Lit != LitModifier::None &&
3549 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3550 return ParseStatus::Failure;
3551
3552 AMDGPUOperand::Modifiers Mods;
3553 Mods.Abs = Abs || SP3Abs;
3554 Mods.Neg = Neg || SP3Neg;
3555 Mods.Lit = Lit;
3556
3557 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3558 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3559 if (Op.isExpr())
3560 return Error(Op.getStartLoc(), "expected an absolute expression");
3561 Op.setModifiers(Mods);
3562 }
3563 return ParseStatus::Success;
3564}
3565
3566ParseStatus
3567AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3568 bool AllowImm) {
3569 bool Sext = trySkipId("sext");
3570 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3571 return ParseStatus::Failure;
3572
3573 ParseStatus Res;
3574 if (AllowImm) {
3575 Res = parseRegOrImm(Operands);
3576 } else {
3577 Res = parseReg(Operands);
3578 }
3579 if (!Res.isSuccess())
3580 return Sext ? ParseStatus::Failure : Res;
3581
3582 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3583 return ParseStatus::Failure;
3584
3585 AMDGPUOperand::Modifiers Mods;
3586 Mods.Sext = Sext;
3587
3588 if (Mods.hasIntModifiers()) {
3589 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3590 if (Op.isExpr())
3591 return Error(Op.getStartLoc(), "expected an absolute expression");
3592 Op.setModifiers(Mods);
3593 }
3594
3595 return ParseStatus::Success;
3596}
3597
3598ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3599 return parseRegOrImmWithFPInputMods(Operands, false);
3600}
3601
3602ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3603 return parseRegOrImmWithIntInputMods(Operands, false);
3604}
3605
3606ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3607 auto Loc = getLoc();
3608 if (trySkipId("off")) {
3609 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3610 AMDGPUOperand::ImmTyOff, false));
3611 return ParseStatus::Success;
3612 }
3613
3614 if (!isRegister())
3615 return ParseStatus::NoMatch;
3616
3617 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3618 if (Reg) {
3619 Operands.push_back(std::move(Reg));
3620 return ParseStatus::Success;
3621 }
3622
3623 return ParseStatus::Failure;
3624}
3625
3626unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3627 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3628
3629 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3630 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3631 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3632 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3633 return Match_InvalidOperand;
3634
3635 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3636 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3637 // v_mac_f32/16 allow only dst_sel == DWORD;
3638 auto OpNum =
3639 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3640 const auto &Op = Inst.getOperand(OpNum);
3641 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3642 return Match_InvalidOperand;
3643 }
3644 }
3645
3646 // Asm can first try to match VOPD or VOPD3. By failing early here with
3647 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3648 // Checking later during validateInstruction does not give a chance to retry
3649 // parsing as a different encoding.
3650 if (tryAnotherVOPDEncoding(Inst))
3651 return Match_InvalidOperand;
3652
3653 return Match_Success;
3654}
3655
3665
3666// What asm variants we should check
3667ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3668 if (isForcedDPP() && isForcedVOP3()) {
3669 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3670 return ArrayRef(Variants);
3671 }
3672 if (getForcedEncodingSize() == 32) {
3673 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3674 return ArrayRef(Variants);
3675 }
3676
3677 if (isForcedVOP3()) {
3678 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3679 return ArrayRef(Variants);
3680 }
3681
3682 if (isForcedSDWA()) {
3683 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3685 return ArrayRef(Variants);
3686 }
3687
3688 if (isForcedDPP()) {
3689 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3690 return ArrayRef(Variants);
3691 }
3692
3693 return getAllVariants();
3694}
3695
3696StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3697 if (isForcedDPP() && isForcedVOP3())
3698 return "e64_dpp";
3699
3700 if (getForcedEncodingSize() == 32)
3701 return "e32";
3702
3703 if (isForcedVOP3())
3704 return "e64";
3705
3706 if (isForcedSDWA())
3707 return "sdwa";
3708
3709 if (isForcedDPP())
3710 return "dpp";
3711
3712 return "";
3713}
3714
3715MCRegister
3716AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3717 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3718 for (MCPhysReg Reg : Desc.implicit_uses()) {
3719 switch (Reg) {
3720 case AMDGPU::FLAT_SCR:
3721 case AMDGPU::VCC:
3722 case AMDGPU::VCC_LO:
3723 case AMDGPU::VCC_HI:
3724 case AMDGPU::M0:
3725 return Reg;
3726 default:
3727 break;
3728 }
3729 }
3730 return MCRegister();
3731}
3732
3733// NB: This code is correct only when used to check constant
3734// bus limitations because GFX7 support no f16 inline constants.
3735// Note that there are no cases when a GFX7 opcode violates
3736// constant bus limitations due to the use of an f16 constant.
3737bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3738 unsigned OpIdx) const {
3739 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3740
3743 return false;
3744 }
3745
3746 const MCOperand &MO = Inst.getOperand(OpIdx);
3747
3748 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3749 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3750
3751 switch (OpSize) { // expected operand size
3752 case 8:
3753 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3754 case 4:
3755 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3756 case 2: {
3757 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3760 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3761
3765
3769
3772
3776
3779 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3780
3783 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3784
3786 return false;
3787
3788 llvm_unreachable("invalid operand type");
3789 }
3790 default:
3791 llvm_unreachable("invalid operand size");
3792 }
3793}
3794
3795unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3796 if (!isGFX10Plus())
3797 return 1;
3798
3799 switch (Opcode) {
3800 // 64-bit shift instructions can use only one scalar value input
3801 case AMDGPU::V_LSHLREV_B64_e64:
3802 case AMDGPU::V_LSHLREV_B64_gfx10:
3803 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3804 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3805 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3806 case AMDGPU::V_LSHRREV_B64_e64:
3807 case AMDGPU::V_LSHRREV_B64_gfx10:
3808 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3809 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3810 case AMDGPU::V_ASHRREV_I64_e64:
3811 case AMDGPU::V_ASHRREV_I64_gfx10:
3812 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3813 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3814 case AMDGPU::V_LSHL_B64_e64:
3815 case AMDGPU::V_LSHR_B64_e64:
3816 case AMDGPU::V_ASHR_I64_e64:
3817 return 1;
3818 default:
3819 return 2;
3820 }
3821}
3822
3823constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3825
3826// Get regular operand indices in the same order as specified
3827// in the instruction (but append mandatory literals to the end).
3829 bool AddMandatoryLiterals = false) {
3830
3831 int16_t ImmIdx =
3832 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3833
3834 if (isVOPD(Opcode)) {
3835 int16_t ImmXIdx =
3836 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3837
3838 return {getNamedOperandIdx(Opcode, OpName::src0X),
3839 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3840 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3841 getNamedOperandIdx(Opcode, OpName::src0Y),
3842 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3843 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3844 ImmXIdx,
3845 ImmIdx};
3846 }
3847
3848 return {getNamedOperandIdx(Opcode, OpName::src0),
3849 getNamedOperandIdx(Opcode, OpName::src1),
3850 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3851}
3852
3853bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3854 const MCOperand &MO = Inst.getOperand(OpIdx);
3855 if (MO.isImm())
3856 return !isInlineConstant(Inst, OpIdx);
3857 if (MO.isReg()) {
3858 auto Reg = MO.getReg();
3859 if (!Reg)
3860 return false;
3861 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3862 auto PReg = mc2PseudoReg(Reg);
3863 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3864 }
3865 return true;
3866}
3867
3868// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3869// Writelane is special in that it can use SGPR and M0 (which would normally
3870// count as using the constant bus twice - but in this case it is allowed since
3871// the lane selector doesn't count as a use of the constant bus). However, it is
3872// still required to abide by the 1 SGPR rule.
3873static bool checkWriteLane(const MCInst &Inst) {
3874 const unsigned Opcode = Inst.getOpcode();
3875 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3876 return false;
3877 const MCOperand &LaneSelOp = Inst.getOperand(2);
3878 if (!LaneSelOp.isReg())
3879 return false;
3880 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3881 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3882}
3883
3884bool AMDGPUAsmParser::validateConstantBusLimitations(
3885 const MCInst &Inst, const OperandVector &Operands) {
3886 const unsigned Opcode = Inst.getOpcode();
3887 const MCInstrDesc &Desc = MII.get(Opcode);
3888 MCRegister LastSGPR;
3889 unsigned ConstantBusUseCount = 0;
3890 unsigned NumLiterals = 0;
3891 unsigned LiteralSize;
3892
3893 if (!(Desc.TSFlags &
3896 !isVOPD(Opcode))
3897 return true;
3898
3899 if (checkWriteLane(Inst))
3900 return true;
3901
3902 // Check special imm operands (used by madmk, etc)
3903 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3904 ++NumLiterals;
3905 LiteralSize = 4;
3906 }
3907
3908 SmallDenseSet<MCRegister> SGPRsUsed;
3909 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3910 if (SGPRUsed) {
3911 SGPRsUsed.insert(SGPRUsed);
3912 ++ConstantBusUseCount;
3913 }
3914
3915 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3916
3917 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3918
3919 for (int OpIdx : OpIndices) {
3920 if (OpIdx == -1)
3921 continue;
3922
3923 const MCOperand &MO = Inst.getOperand(OpIdx);
3924 if (usesConstantBus(Inst, OpIdx)) {
3925 if (MO.isReg()) {
3926 LastSGPR = mc2PseudoReg(MO.getReg());
3927 // Pairs of registers with a partial intersections like these
3928 // s0, s[0:1]
3929 // flat_scratch_lo, flat_scratch
3930 // flat_scratch_lo, flat_scratch_hi
3931 // are theoretically valid but they are disabled anyway.
3932 // Note that this code mimics SIInstrInfo::verifyInstruction
3933 if (SGPRsUsed.insert(LastSGPR).second) {
3934 ++ConstantBusUseCount;
3935 }
3936 } else { // Expression or a literal
3937
3938 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3939 continue; // special operand like VINTERP attr_chan
3940
3941 // An instruction may use only one literal.
3942 // This has been validated on the previous step.
3943 // See validateVOPLiteral.
3944 // This literal may be used as more than one operand.
3945 // If all these operands are of the same size,
3946 // this literal counts as one scalar value.
3947 // Otherwise it counts as 2 scalar values.
3948 // See "GFX10 Shader Programming", section 3.6.2.3.
3949
3951 if (Size < 4)
3952 Size = 4;
3953
3954 if (NumLiterals == 0) {
3955 NumLiterals = 1;
3956 LiteralSize = Size;
3957 } else if (LiteralSize != Size) {
3958 NumLiterals = 2;
3959 }
3960 }
3961 }
3962
3963 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3964 Error(getOperandLoc(Operands, OpIdx),
3965 "invalid operand (violates constant bus restrictions)");
3966 return false;
3967 }
3968 }
3969 return true;
3970}
3971
3972std::optional<unsigned>
3973AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3974
3975 const unsigned Opcode = Inst.getOpcode();
3976 if (!isVOPD(Opcode))
3977 return {};
3978
3979 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3980
3981 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3982 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3983 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3984 ? Opr.getReg()
3985 : MCRegister();
3986 };
3987
3988 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3989 // source-cache.
3990 bool SkipSrc =
3991 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3992 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3993 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3994 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3995 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3996 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3997 bool AllowSameVGPR = isGFX12Plus();
3998
3999 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4000 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4001 int I = getNamedOperandIdx(Opcode, OpName);
4002 const MCOperand &Op = Inst.getOperand(I);
4003 if (!Op.isImm())
4004 continue;
4005 int64_t Imm = Op.getImm();
4006 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4007 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4008 return (unsigned)I;
4009 }
4010
4011 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4012 OpName::vsrc2Y, OpName::imm}) {
4013 int I = getNamedOperandIdx(Opcode, OpName);
4014 if (I == -1)
4015 continue;
4016 const MCOperand &Op = Inst.getOperand(I);
4017 if (Op.isImm())
4018 return (unsigned)I;
4019 }
4020 }
4021
4022 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4023 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4024 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4025
4026 return InvalidCompOprIdx;
4027}
4028
4029bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4030 const OperandVector &Operands) {
4031
4032 unsigned Opcode = Inst.getOpcode();
4033 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4034
4035 if (AsVOPD3) {
4036 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4037 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4038 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4039 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4040 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4041 }
4042 }
4043
4044 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4045 if (!InvalidCompOprIdx.has_value())
4046 return true;
4047
4048 auto CompOprIdx = *InvalidCompOprIdx;
4049 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4050 auto ParsedIdx =
4051 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4052 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4053 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4054
4055 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4056 if (CompOprIdx == VOPD::Component::DST) {
4057 if (AsVOPD3)
4058 Error(Loc, "dst registers must be distinct");
4059 else
4060 Error(Loc, "one dst register must be even and the other odd");
4061 } else {
4062 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4063 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4064 " operands must use different VGPR banks");
4065 }
4066
4067 return false;
4068}
4069
4070// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4071// potentially used as VOPD3 with the same operands.
4072bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4073 // First check if it fits VOPD
4074 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4075 if (!InvalidCompOprIdx.has_value())
4076 return false;
4077
4078 // Then if it fits VOPD3
4079 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4080 if (InvalidCompOprIdx.has_value()) {
4081 // If failed operand is dst it is better to show error about VOPD3
4082 // instruction as it has more capabilities and error message will be
4083 // more informative. If the dst is not legal for VOPD3, then it is not
4084 // legal for VOPD either.
4085 if (*InvalidCompOprIdx == VOPD::Component::DST)
4086 return true;
4087
4088 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4089 // with a conflict in tied implicit src2 of fmac and no asm operand to
4090 // to point to.
4091 return false;
4092 }
4093 return true;
4094}
4095
4096// \returns true is a VOPD3 instruction can be also represented as a shorter
4097// VOPD encoding.
4098bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4099 const unsigned Opcode = Inst.getOpcode();
4100 const auto &II = getVOPDInstInfo(Opcode, &MII);
4101 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4102 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4103 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4104 return false;
4105
4106 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4107 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4108 // be parsed as VOPD which does not accept src2.
4109 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4110 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4111 return false;
4112
4113 // If any modifiers are set this cannot be VOPD.
4114 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4115 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4116 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4117 int I = getNamedOperandIdx(Opcode, OpName);
4118 if (I == -1)
4119 continue;
4120 if (Inst.getOperand(I).getImm())
4121 return false;
4122 }
4123
4124 return !tryVOPD3(Inst);
4125}
4126
4127// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4128// form but switch to VOPD3 otherwise.
4129bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4130 const unsigned Opcode = Inst.getOpcode();
4131 if (!isGFX1250Plus() || !isVOPD(Opcode))
4132 return false;
4133
4134 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4135 return tryVOPD(Inst);
4136 return tryVOPD3(Inst);
4137}
4138
4139bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4140
4141 const unsigned Opc = Inst.getOpcode();
4142 const MCInstrDesc &Desc = MII.get(Opc);
4143
4144 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4145 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4146 assert(ClampIdx != -1);
4147 return Inst.getOperand(ClampIdx).getImm() == 0;
4148 }
4149
4150 return true;
4151}
4152
4155
4156bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4157
4158 const unsigned Opc = Inst.getOpcode();
4159 const MCInstrDesc &Desc = MII.get(Opc);
4160
4161 if ((Desc.TSFlags & MIMGFlags) == 0)
4162 return true;
4163
4164 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4165 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4166 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4167
4168 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4169 return true;
4170
4171 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4172 return true;
4173
4174 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4175 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4176 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4177 if (DMask == 0)
4178 DMask = 1;
4179
4180 bool IsPackedD16 = false;
4181 unsigned DataSize =
4182 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4183 if (hasPackedD16()) {
4184 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4185 IsPackedD16 = D16Idx >= 0;
4186 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4187 DataSize = (DataSize + 1) / 2;
4188 }
4189
4190 if ((VDataSize / 4) == DataSize + TFESize)
4191 return true;
4192
4193 StringRef Modifiers;
4194 if (isGFX90A())
4195 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4196 else
4197 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4198
4199 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4200 return false;
4201}
4202
4203bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4204 const unsigned Opc = Inst.getOpcode();
4205 const MCInstrDesc &Desc = MII.get(Opc);
4206
4207 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4208 return true;
4209
4210 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4211
4212 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4214 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4215 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4216 ? AMDGPU::OpName::srsrc
4217 : AMDGPU::OpName::rsrc;
4218 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4219 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4220 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4221
4222 assert(VAddr0Idx != -1);
4223 assert(SrsrcIdx != -1);
4224 assert(SrsrcIdx > VAddr0Idx);
4225
4226 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4227 if (BaseOpcode->BVH) {
4228 if (IsA16 == BaseOpcode->A16)
4229 return true;
4230 Error(IDLoc, "image address size does not match a16");
4231 return false;
4232 }
4233
4234 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4235 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4236 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4237 unsigned ActualAddrSize =
4238 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4239
4240 unsigned ExpectedAddrSize =
4241 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4242
4243 if (IsNSA) {
4244 if (hasPartialNSAEncoding() &&
4245 ExpectedAddrSize >
4247 int VAddrLastIdx = SrsrcIdx - 1;
4248 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4249
4250 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4251 }
4252 } else {
4253 if (ExpectedAddrSize > 12)
4254 ExpectedAddrSize = 16;
4255
4256 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4257 // This provides backward compatibility for assembly created
4258 // before 160b/192b/224b types were directly supported.
4259 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4260 return true;
4261 }
4262
4263 if (ActualAddrSize == ExpectedAddrSize)
4264 return true;
4265
4266 Error(IDLoc, "image address size does not match dim and a16");
4267 return false;
4268}
4269
4270bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4271
4272 const unsigned Opc = Inst.getOpcode();
4273 const MCInstrDesc &Desc = MII.get(Opc);
4274
4275 if ((Desc.TSFlags & MIMGFlags) == 0)
4276 return true;
4277 if (!Desc.mayLoad() || !Desc.mayStore())
4278 return true; // Not atomic
4279
4280 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4281 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4282
4283 // This is an incomplete check because image_atomic_cmpswap
4284 // may only use 0x3 and 0xf while other atomic operations
4285 // may use 0x1 and 0x3. However these limitations are
4286 // verified when we check that dmask matches dst size.
4287 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4288}
4289
4290bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4291
4292 const unsigned Opc = Inst.getOpcode();
4293 const MCInstrDesc &Desc = MII.get(Opc);
4294
4295 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4296 return true;
4297
4298 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4299 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4300
4301 // GATHER4 instructions use dmask in a different fashion compared to
4302 // other MIMG instructions. The only useful DMASK values are
4303 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4304 // (red,red,red,red) etc.) The ISA document doesn't mention
4305 // this.
4306 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4307}
4308
4309bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4310 const OperandVector &Operands) {
4311 if (!isGFX10Plus())
4312 return true;
4313
4314 const unsigned Opc = Inst.getOpcode();
4315 const MCInstrDesc &Desc = MII.get(Opc);
4316
4317 if ((Desc.TSFlags & MIMGFlags) == 0)
4318 return true;
4319
4320 // image_bvh_intersect_ray instructions do not have dim
4322 return true;
4323
4324 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4325 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4326 if (Op.isDim())
4327 return true;
4328 }
4329 return false;
4330}
4331
4332bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4333 const unsigned Opc = Inst.getOpcode();
4334 const MCInstrDesc &Desc = MII.get(Opc);
4335
4336 if ((Desc.TSFlags & MIMGFlags) == 0)
4337 return true;
4338
4339 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4340 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4342
4343 if (!BaseOpcode->MSAA)
4344 return true;
4345
4346 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4347 assert(DimIdx != -1);
4348
4349 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4350 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4351
4352 return DimInfo->MSAA;
4353}
4354
4355static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4356{
4357 switch (Opcode) {
4358 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4359 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4360 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4361 return true;
4362 default:
4363 return false;
4364 }
4365}
4366
4367// movrels* opcodes should only allow VGPRS as src0.
4368// This is specified in .td description for vop1/vop3,
4369// but sdwa is handled differently. See isSDWAOperand.
4370bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4371 const OperandVector &Operands) {
4372
4373 const unsigned Opc = Inst.getOpcode();
4374 const MCInstrDesc &Desc = MII.get(Opc);
4375
4376 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4377 return true;
4378
4379 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4380 assert(Src0Idx != -1);
4381
4382 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4383 if (Src0.isReg()) {
4384 auto Reg = mc2PseudoReg(Src0.getReg());
4385 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4386 if (!isSGPR(Reg, TRI))
4387 return true;
4388 }
4389
4390 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4391 return false;
4392}
4393
4394bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4395 const OperandVector &Operands) {
4396
4397 const unsigned Opc = Inst.getOpcode();
4398
4399 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4400 return true;
4401
4402 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4403 assert(Src0Idx != -1);
4404
4405 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4406 if (!Src0.isReg())
4407 return true;
4408
4409 auto Reg = mc2PseudoReg(Src0.getReg());
4410 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4411 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4412 Error(getOperandLoc(Operands, Src0Idx),
4413 "source operand must be either a VGPR or an inline constant");
4414 return false;
4415 }
4416
4417 return true;
4418}
4419
4420bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4421 const OperandVector &Operands) {
4422 unsigned Opcode = Inst.getOpcode();
4423 const MCInstrDesc &Desc = MII.get(Opcode);
4424
4425 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4426 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4427 return true;
4428
4429 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4430 if (Src2Idx == -1)
4431 return true;
4432
4433 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4434 Error(getOperandLoc(Operands, Src2Idx),
4435 "inline constants are not allowed for this operand");
4436 return false;
4437 }
4438
4439 return true;
4440}
4441
4442bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4443 const OperandVector &Operands) {
4444 const unsigned Opc = Inst.getOpcode();
4445 const MCInstrDesc &Desc = MII.get(Opc);
4446
4447 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4448 return true;
4449
4450 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4451 if (BlgpIdx != -1) {
4452 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4453 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4454
4455 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4456 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4457
4458 // Validate the correct register size was used for the floating point
4459 // format operands
4460
4461 bool Success = true;
4462 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4463 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4464 Error(getOperandLoc(Operands, Src0Idx),
4465 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4466 Success = false;
4467 }
4468
4469 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4470 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4471 Error(getOperandLoc(Operands, Src1Idx),
4472 "wrong register tuple size for blgp value " + Twine(BLGP));
4473 Success = false;
4474 }
4475
4476 return Success;
4477 }
4478 }
4479
4480 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4481 if (Src2Idx == -1)
4482 return true;
4483
4484 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4485 if (!Src2.isReg())
4486 return true;
4487
4488 MCRegister Src2Reg = Src2.getReg();
4489 MCRegister DstReg = Inst.getOperand(0).getReg();
4490 if (Src2Reg == DstReg)
4491 return true;
4492
4493 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4494 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4495 .getSizeInBits() <= 128)
4496 return true;
4497
4498 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4499 Error(getOperandLoc(Operands, Src2Idx),
4500 "source 2 operand must not partially overlap with dst");
4501 return false;
4502 }
4503
4504 return true;
4505}
4506
4507bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4508 switch (Inst.getOpcode()) {
4509 default:
4510 return true;
4511 case V_DIV_SCALE_F32_gfx6_gfx7:
4512 case V_DIV_SCALE_F32_vi:
4513 case V_DIV_SCALE_F32_gfx10:
4514 case V_DIV_SCALE_F64_gfx6_gfx7:
4515 case V_DIV_SCALE_F64_vi:
4516 case V_DIV_SCALE_F64_gfx10:
4517 break;
4518 }
4519
4520 // TODO: Check that src0 = src1 or src2.
4521
4522 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4523 AMDGPU::OpName::src2_modifiers,
4524 AMDGPU::OpName::src2_modifiers}) {
4525 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4526 .getImm() &
4528 return false;
4529 }
4530 }
4531
4532 return true;
4533}
4534
4535bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4536
4537 const unsigned Opc = Inst.getOpcode();
4538 const MCInstrDesc &Desc = MII.get(Opc);
4539
4540 if ((Desc.TSFlags & MIMGFlags) == 0)
4541 return true;
4542
4543 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4544 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4545 if (isCI() || isSI())
4546 return false;
4547 }
4548
4549 return true;
4550}
4551
4552bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4553 const unsigned Opc = Inst.getOpcode();
4554 const MCInstrDesc &Desc = MII.get(Opc);
4555
4556 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4557 return true;
4558
4559 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4560
4561 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4562}
4563
4564static bool IsRevOpcode(const unsigned Opcode)
4565{
4566 switch (Opcode) {
4567 case AMDGPU::V_SUBREV_F32_e32:
4568 case AMDGPU::V_SUBREV_F32_e64:
4569 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4570 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4571 case AMDGPU::V_SUBREV_F32_e32_vi:
4572 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4573 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4574 case AMDGPU::V_SUBREV_F32_e64_vi:
4575
4576 case AMDGPU::V_SUBREV_CO_U32_e32:
4577 case AMDGPU::V_SUBREV_CO_U32_e64:
4578 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4579 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4580
4581 case AMDGPU::V_SUBBREV_U32_e32:
4582 case AMDGPU::V_SUBBREV_U32_e64:
4583 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4584 case AMDGPU::V_SUBBREV_U32_e32_vi:
4585 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4586 case AMDGPU::V_SUBBREV_U32_e64_vi:
4587
4588 case AMDGPU::V_SUBREV_U32_e32:
4589 case AMDGPU::V_SUBREV_U32_e64:
4590 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4591 case AMDGPU::V_SUBREV_U32_e32_vi:
4592 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4593 case AMDGPU::V_SUBREV_U32_e64_vi:
4594
4595 case AMDGPU::V_SUBREV_F16_e32:
4596 case AMDGPU::V_SUBREV_F16_e64:
4597 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4598 case AMDGPU::V_SUBREV_F16_e32_vi:
4599 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4600 case AMDGPU::V_SUBREV_F16_e64_vi:
4601
4602 case AMDGPU::V_SUBREV_U16_e32:
4603 case AMDGPU::V_SUBREV_U16_e64:
4604 case AMDGPU::V_SUBREV_U16_e32_vi:
4605 case AMDGPU::V_SUBREV_U16_e64_vi:
4606
4607 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4608 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4609 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4610
4611 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4612 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4613
4614 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4615 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4616
4617 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4618 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4619
4620 case AMDGPU::V_LSHRREV_B32_e32:
4621 case AMDGPU::V_LSHRREV_B32_e64:
4622 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4623 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4624 case AMDGPU::V_LSHRREV_B32_e32_vi:
4625 case AMDGPU::V_LSHRREV_B32_e64_vi:
4626 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4627 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4628
4629 case AMDGPU::V_ASHRREV_I32_e32:
4630 case AMDGPU::V_ASHRREV_I32_e64:
4631 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4632 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4633 case AMDGPU::V_ASHRREV_I32_e32_vi:
4634 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4635 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4636 case AMDGPU::V_ASHRREV_I32_e64_vi:
4637
4638 case AMDGPU::V_LSHLREV_B32_e32:
4639 case AMDGPU::V_LSHLREV_B32_e64:
4640 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4641 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4642 case AMDGPU::V_LSHLREV_B32_e32_vi:
4643 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4644 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4645 case AMDGPU::V_LSHLREV_B32_e64_vi:
4646
4647 case AMDGPU::V_LSHLREV_B16_e32:
4648 case AMDGPU::V_LSHLREV_B16_e64:
4649 case AMDGPU::V_LSHLREV_B16_e32_vi:
4650 case AMDGPU::V_LSHLREV_B16_e64_vi:
4651 case AMDGPU::V_LSHLREV_B16_gfx10:
4652
4653 case AMDGPU::V_LSHRREV_B16_e32:
4654 case AMDGPU::V_LSHRREV_B16_e64:
4655 case AMDGPU::V_LSHRREV_B16_e32_vi:
4656 case AMDGPU::V_LSHRREV_B16_e64_vi:
4657 case AMDGPU::V_LSHRREV_B16_gfx10:
4658
4659 case AMDGPU::V_ASHRREV_I16_e32:
4660 case AMDGPU::V_ASHRREV_I16_e64:
4661 case AMDGPU::V_ASHRREV_I16_e32_vi:
4662 case AMDGPU::V_ASHRREV_I16_e64_vi:
4663 case AMDGPU::V_ASHRREV_I16_gfx10:
4664
4665 case AMDGPU::V_LSHLREV_B64_e64:
4666 case AMDGPU::V_LSHLREV_B64_gfx10:
4667 case AMDGPU::V_LSHLREV_B64_vi:
4668
4669 case AMDGPU::V_LSHRREV_B64_e64:
4670 case AMDGPU::V_LSHRREV_B64_gfx10:
4671 case AMDGPU::V_LSHRREV_B64_vi:
4672
4673 case AMDGPU::V_ASHRREV_I64_e64:
4674 case AMDGPU::V_ASHRREV_I64_gfx10:
4675 case AMDGPU::V_ASHRREV_I64_vi:
4676
4677 case AMDGPU::V_PK_LSHLREV_B16:
4678 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4679 case AMDGPU::V_PK_LSHLREV_B16_vi:
4680
4681 case AMDGPU::V_PK_LSHRREV_B16:
4682 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4683 case AMDGPU::V_PK_LSHRREV_B16_vi:
4684 case AMDGPU::V_PK_ASHRREV_I16:
4685 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4686 case AMDGPU::V_PK_ASHRREV_I16_vi:
4687 return true;
4688 default:
4689 return false;
4690 }
4691}
4692
4693bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4694 const OperandVector &Operands) {
4695 using namespace SIInstrFlags;
4696 const unsigned Opcode = Inst.getOpcode();
4697 const MCInstrDesc &Desc = MII.get(Opcode);
4698
4699 // lds_direct register is defined so that it can be used
4700 // with 9-bit operands only. Ignore encodings which do not accept these.
4701 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4702 if ((Desc.TSFlags & Enc) == 0)
4703 return true;
4704
4705 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4706 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4707 if (SrcIdx == -1)
4708 break;
4709 const auto &Src = Inst.getOperand(SrcIdx);
4710 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4711
4712 if (isGFX90A() || isGFX11Plus()) {
4713 Error(getOperandLoc(Operands, SrcIdx),
4714 "lds_direct is not supported on this GPU");
4715 return false;
4716 }
4717
4718 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4719 Error(getOperandLoc(Operands, SrcIdx),
4720 "lds_direct cannot be used with this instruction");
4721 return false;
4722 }
4723
4724 if (SrcName != OpName::src0) {
4725 Error(getOperandLoc(Operands, SrcIdx),
4726 "lds_direct may be used as src0 only");
4727 return false;
4728 }
4729 }
4730 }
4731
4732 return true;
4733}
4734
4735SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4736 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4737 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4738 if (Op.isFlatOffset())
4739 return Op.getStartLoc();
4740 }
4741 return getLoc();
4742}
4743
4744bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4745 const OperandVector &Operands) {
4746 auto Opcode = Inst.getOpcode();
4747 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4748 if (OpNum == -1)
4749 return true;
4750
4751 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4752 if ((TSFlags & SIInstrFlags::FLAT))
4753 return validateFlatOffset(Inst, Operands);
4754
4755 if ((TSFlags & SIInstrFlags::SMRD))
4756 return validateSMEMOffset(Inst, Operands);
4757
4758 const auto &Op = Inst.getOperand(OpNum);
4759 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4760 if (isGFX12Plus() &&
4761 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4762 const unsigned OffsetSize = 24;
4763 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4764 Error(getFlatOffsetLoc(Operands),
4765 Twine("expected a ") + Twine(OffsetSize - 1) +
4766 "-bit unsigned offset for buffer ops");
4767 return false;
4768 }
4769 } else {
4770 const unsigned OffsetSize = 16;
4771 if (!isUIntN(OffsetSize, Op.getImm())) {
4772 Error(getFlatOffsetLoc(Operands),
4773 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4774 return false;
4775 }
4776 }
4777 return true;
4778}
4779
4780bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4781 const OperandVector &Operands) {
4782 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4783 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4784 return true;
4785
4786 auto Opcode = Inst.getOpcode();
4787 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4788 assert(OpNum != -1);
4789
4790 const auto &Op = Inst.getOperand(OpNum);
4791 if (!hasFlatOffsets() && Op.getImm() != 0) {
4792 Error(getFlatOffsetLoc(Operands),
4793 "flat offset modifier is not supported on this GPU");
4794 return false;
4795 }
4796
4797 // For pre-GFX12 FLAT instructions the offset must be positive;
4798 // MSB is ignored and forced to zero.
4799 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4800 bool AllowNegative =
4802 isGFX12Plus();
4803 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4804 Error(getFlatOffsetLoc(Operands),
4805 Twine("expected a ") +
4806 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4807 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4808 return false;
4809 }
4810
4811 return true;
4812}
4813
4814SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4815 // Start with second operand because SMEM Offset cannot be dst or src0.
4816 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4817 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4818 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4819 return Op.getStartLoc();
4820 }
4821 return getLoc();
4822}
4823
4824bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4825 const OperandVector &Operands) {
4826 if (isCI() || isSI())
4827 return true;
4828
4829 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4830 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4831 return true;
4832
4833 auto Opcode = Inst.getOpcode();
4834 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4835 if (OpNum == -1)
4836 return true;
4837
4838 const auto &Op = Inst.getOperand(OpNum);
4839 if (!Op.isImm())
4840 return true;
4841
4842 uint64_t Offset = Op.getImm();
4843 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4846 return true;
4847
4848 Error(getSMEMOffsetLoc(Operands),
4849 isGFX12Plus() && IsBuffer
4850 ? "expected a 23-bit unsigned offset for buffer ops"
4851 : isGFX12Plus() ? "expected a 24-bit signed offset"
4852 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4853 : "expected a 21-bit signed offset");
4854
4855 return false;
4856}
4857
4858bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4859 const OperandVector &Operands) {
4860 unsigned Opcode = Inst.getOpcode();
4861 const MCInstrDesc &Desc = MII.get(Opcode);
4862 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4863 return true;
4864
4865 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4866 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4867
4868 const int OpIndices[] = { Src0Idx, Src1Idx };
4869
4870 unsigned NumExprs = 0;
4871 unsigned NumLiterals = 0;
4872 int64_t LiteralValue;
4873
4874 for (int OpIdx : OpIndices) {
4875 if (OpIdx == -1) break;
4876
4877 const MCOperand &MO = Inst.getOperand(OpIdx);
4878 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4880 bool IsLit = false;
4881 std::optional<int64_t> Imm;
4882 if (MO.isImm()) {
4883 Imm = MO.getImm();
4884 } else if (MO.isExpr()) {
4885 if (isLitExpr(MO.getExpr())) {
4886 IsLit = true;
4887 Imm = getLitValue(MO.getExpr());
4888 }
4889 } else {
4890 continue;
4891 }
4892
4893 if (!Imm.has_value()) {
4894 ++NumExprs;
4895 } else if (!isInlineConstant(Inst, OpIdx)) {
4896 auto OpType = static_cast<AMDGPU::OperandType>(
4897 Desc.operands()[OpIdx].OperandType);
4898 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4899 if (NumLiterals == 0 || LiteralValue != Value) {
4901 ++NumLiterals;
4902 }
4903 }
4904 }
4905 }
4906
4907 if (NumLiterals + NumExprs <= 1)
4908 return true;
4909
4910 Error(getOperandLoc(Operands, Src1Idx),
4911 "only one unique literal operand is allowed");
4912 return false;
4913}
4914
4915bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4916 const unsigned Opc = Inst.getOpcode();
4917 if (isPermlane16(Opc)) {
4918 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4919 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4920
4921 if (OpSel & ~3)
4922 return false;
4923 }
4924
4925 uint64_t TSFlags = MII.get(Opc).TSFlags;
4926
4927 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4928 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4929 if (OpSelIdx != -1) {
4930 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4931 return false;
4932 }
4933 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4934 if (OpSelHiIdx != -1) {
4935 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4936 return false;
4937 }
4938 }
4939
4940 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4941 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4942 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4943 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4944 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4945 if (OpSel & 3)
4946 return false;
4947 }
4948
4949 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4950 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4951 // the first SGPR and use it for both the low and high operations.
4952 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4953 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4954 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4955 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4956 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4957
4958 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4959 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4960 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4961 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4962
4963 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4964
4965 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4966 unsigned Mask = 1U << Index;
4967 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4968 };
4969
4970 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4971 !VerifyOneSGPR(/*Index=*/0))
4972 return false;
4973 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4974 !VerifyOneSGPR(/*Index=*/1))
4975 return false;
4976
4977 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4978 if (Src2Idx != -1) {
4979 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4980 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4981 !VerifyOneSGPR(/*Index=*/2))
4982 return false;
4983 }
4984 }
4985
4986 return true;
4987}
4988
4989bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4990 if (!hasTrue16Insts())
4991 return true;
4992 const MCRegisterInfo *MRI = getMRI();
4993 const unsigned Opc = Inst.getOpcode();
4994 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4995 if (OpSelIdx == -1)
4996 return true;
4997 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4998 // If the value is 0 we could have a default OpSel Operand, so conservatively
4999 // allow it.
5000 if (OpSelOpValue == 0)
5001 return true;
5002 unsigned OpCount = 0;
5003 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5004 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5005 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5006 if (OpIdx == -1)
5007 continue;
5008 const MCOperand &Op = Inst.getOperand(OpIdx);
5009 if (Op.isReg() &&
5010 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5011 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5012 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5013 if (OpSelOpIsHi != VGPRSuffixIsHi)
5014 return false;
5015 }
5016 ++OpCount;
5017 }
5018
5019 return true;
5020}
5021
5022bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5023 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5024
5025 const unsigned Opc = Inst.getOpcode();
5026 uint64_t TSFlags = MII.get(Opc).TSFlags;
5027
5028 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5029 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5030 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5031 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5032 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5033 !(TSFlags & SIInstrFlags::IsSWMMAC))
5034 return true;
5035
5036 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5037 if (NegIdx == -1)
5038 return true;
5039
5040 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5041
5042 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5043 // on some src operands but not allowed on other.
5044 // It is convenient that such instructions don't have src_modifiers operand
5045 // for src operands that don't allow neg because they also don't allow opsel.
5046
5047 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5048 AMDGPU::OpName::src1_modifiers,
5049 AMDGPU::OpName::src2_modifiers};
5050
5051 for (unsigned i = 0; i < 3; ++i) {
5052 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5053 if (Neg & (1 << i))
5054 return false;
5055 }
5056 }
5057
5058 return true;
5059}
5060
5061bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5062 const OperandVector &Operands) {
5063 const unsigned Opc = Inst.getOpcode();
5064 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5065 if (DppCtrlIdx >= 0) {
5066 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5067
5068 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5069 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5070 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5071 // only on GFX12.
5072 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5073 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5074 : "DP ALU dpp only supports row_newbcast");
5075 return false;
5076 }
5077 }
5078
5079 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5080 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5081
5082 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5083 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5084 if (Src1Idx >= 0) {
5085 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5086 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5087 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5088 Error(getOperandLoc(Operands, Src1Idx),
5089 "invalid operand for instruction");
5090 return false;
5091 }
5092 if (Src1.isImm()) {
5093 Error(getInstLoc(Operands),
5094 "src1 immediate operand invalid for instruction");
5095 return false;
5096 }
5097 }
5098 }
5099
5100 return true;
5101}
5102
5103// Check if VCC register matches wavefront size
5104bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5105 return (Reg == AMDGPU::VCC && isWave64()) ||
5106 (Reg == AMDGPU::VCC_LO && isWave32());
5107}
5108
5109// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5110bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5111 const OperandVector &Operands) {
5112 unsigned Opcode = Inst.getOpcode();
5113 const MCInstrDesc &Desc = MII.get(Opcode);
5114 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5115 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5116 !HasMandatoryLiteral && !isVOPD(Opcode))
5117 return true;
5118
5119 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5120
5121 std::optional<unsigned> LiteralOpIdx;
5122 std::optional<uint64_t> LiteralValue;
5123
5124 for (int OpIdx : OpIndices) {
5125 if (OpIdx == -1)
5126 continue;
5127
5128 const MCOperand &MO = Inst.getOperand(OpIdx);
5129 if (!MO.isImm() && !MO.isExpr())
5130 continue;
5131 if (!isSISrcOperand(Desc, OpIdx))
5132 continue;
5133
5134 std::optional<int64_t> Imm;
5135 if (MO.isImm())
5136 Imm = MO.getImm();
5137 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5138 Imm = getLitValue(MO.getExpr());
5139
5140 bool IsAnotherLiteral = false;
5141 bool IsForcedLit = findMCOperand(Operands, OpIdx).isForcedLit();
5142 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5143 if (!Imm.has_value()) {
5144 // Literal value not known, so we conservately assume it's different.
5145 IsAnotherLiteral = true;
5146 } else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5147 uint64_t Value = *Imm;
5148 bool IsForcedFP64 =
5149 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5151 HasMandatoryLiteral);
5152 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5153 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5154 bool IsValid32Op =
5155 IsForcedLit || AMDGPU::isValid32BitLiteral(Value, IsFP64);
5156
5157 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5158 !IsForcedFP64) ||
5159 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5160 (!has64BitLiterals() || Desc.getSize() != 4)) {
5161 Error(getOperandLoc(Operands, OpIdx),
5162 "invalid operand for instruction");
5163 return false;
5164 }
5165
5166 // Only src0 can use lit64 in VOP* encoding.
5167 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5168 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5169 Error(getOperandLoc(Operands, OpIdx),
5170 "invalid operand for instruction");
5171 return false;
5172 }
5173
5174 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5175 Value = Hi_32(Value);
5176
5177 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5179 }
5180
5181 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5182 !getFeatureBits()[FeatureVOP3Literal]) {
5183 Error(getOperandLoc(Operands, OpIdx),
5184 "literal operands are not supported");
5185 return false;
5186 }
5187
5188 if (LiteralOpIdx && IsAnotherLiteral) {
5189 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5190 getOperandLoc(Operands, *LiteralOpIdx)),
5191 "only one unique literal operand is allowed");
5192 return false;
5193 }
5194
5195 if (IsAnotherLiteral)
5196 LiteralOpIdx = OpIdx;
5197 }
5198
5199 return true;
5200}
5201
5202// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5203static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5204 const MCRegisterInfo *MRI) {
5205 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5206 if (OpIdx < 0)
5207 return -1;
5208
5209 const MCOperand &Op = Inst.getOperand(OpIdx);
5210 if (!Op.isReg())
5211 return -1;
5212
5213 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5214 auto Reg = Sub ? Sub : Op.getReg();
5215 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5216 return AGPR32.contains(Reg) ? 1 : 0;
5217}
5218
5219bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5220 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5221 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5223 SIInstrFlags::DS)) == 0)
5224 return true;
5225
5226 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5227 ? AMDGPU::OpName::data0
5228 : AMDGPU::OpName::vdata;
5229
5230 const MCRegisterInfo *MRI = getMRI();
5231 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5232 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5233
5234 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5235 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5236 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5237 return false;
5238 }
5239
5240 auto FB = getFeatureBits();
5241 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5242 if (DataAreg < 0 || DstAreg < 0)
5243 return true;
5244 return DstAreg == DataAreg;
5245 }
5246
5247 return DstAreg < 1 && DataAreg < 1;
5248}
5249
5250bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5251 auto FB = getFeatureBits();
5252 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5253 return true;
5254
5255 unsigned Opc = Inst.getOpcode();
5256 const MCRegisterInfo *MRI = getMRI();
5257 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5258 // unaligned VGPR. All others only allow even aligned VGPRs.
5259 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5260 return true;
5261
5262 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5263 switch (Opc) {
5264 default:
5265 break;
5266 case AMDGPU::DS_LOAD_TR6_B96:
5267 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5268 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5269 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5270 return true;
5271 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5272 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5273 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5274 // allows unaligned VGPR for vdst, but other operands still only allow
5275 // even aligned VGPRs.
5276 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5277 if (VAddrIdx != -1) {
5278 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5279 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5280 if ((Sub - AMDGPU::VGPR0) & 1)
5281 return false;
5282 }
5283 return true;
5284 }
5285 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5286 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5287 return true;
5288 }
5289 }
5290
5291 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5292 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5293 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5294 const MCOperand &Op = Inst.getOperand(I);
5295 if (!Op.isReg())
5296 continue;
5297
5298 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5299 if (!Sub)
5300 continue;
5301
5302 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5303 return false;
5304 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5305 return false;
5306 }
5307
5308 return true;
5309}
5310
5311SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5312 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5313 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5314 if (Op.isBLGP())
5315 return Op.getStartLoc();
5316 }
5317 return SMLoc();
5318}
5319
5320bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5321 const OperandVector &Operands) {
5322 unsigned Opc = Inst.getOpcode();
5323 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5324 if (BlgpIdx == -1)
5325 return true;
5326 SMLoc BLGPLoc = getBLGPLoc(Operands);
5327 if (!BLGPLoc.isValid())
5328 return true;
5329 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5330 auto FB = getFeatureBits();
5331 bool UsesNeg = false;
5332 if (FB[AMDGPU::FeatureGFX940Insts]) {
5333 switch (Opc) {
5334 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5335 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5336 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5337 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5338 UsesNeg = true;
5339 }
5340 }
5341
5342 if (IsNeg == UsesNeg)
5343 return true;
5344
5345 Error(BLGPLoc,
5346 UsesNeg ? "invalid modifier: blgp is not supported"
5347 : "invalid modifier: neg is not supported");
5348
5349 return false;
5350}
5351
5352bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5353 const OperandVector &Operands) {
5354 if (!isGFX11Plus())
5355 return true;
5356
5357 unsigned Opc = Inst.getOpcode();
5358 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5359 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5360 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5361 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5362 return true;
5363
5364 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5365 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5366 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5367 if (Reg == AMDGPU::SGPR_NULL)
5368 return true;
5369
5370 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5371 return false;
5372}
5373
5374bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5375 const OperandVector &Operands) {
5376 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5377 if ((TSFlags & SIInstrFlags::DS) == 0)
5378 return true;
5379 if (TSFlags & SIInstrFlags::GWS)
5380 return validateGWS(Inst, Operands);
5381 // Only validate GDS for non-GWS instructions.
5382 if (hasGDS())
5383 return true;
5384 int GDSIdx =
5385 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5386 if (GDSIdx < 0)
5387 return true;
5388 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5389 if (GDS) {
5390 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5391 Error(S, "gds modifier is not supported on this GPU");
5392 return false;
5393 }
5394 return true;
5395}
5396
5397// gfx90a has an undocumented limitation:
5398// DS_GWS opcodes must use even aligned registers.
5399bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5400 const OperandVector &Operands) {
5401 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5402 return true;
5403
5404 int Opc = Inst.getOpcode();
5405 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5406 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5407 return true;
5408
5409 const MCRegisterInfo *MRI = getMRI();
5410 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5411 int Data0Pos =
5412 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5413 assert(Data0Pos != -1);
5414 auto Reg = Inst.getOperand(Data0Pos).getReg();
5415 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5416 if (RegIdx & 1) {
5417 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5418 return false;
5419 }
5420
5421 return true;
5422}
5423
5424bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5425 const OperandVector &Operands,
5426 SMLoc IDLoc) {
5427 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5428 AMDGPU::OpName::cpol);
5429 if (CPolPos == -1)
5430 return true;
5431
5432 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5433
5434 if (!isGFX1250Plus()) {
5435 if (CPol & CPol::SCAL) {
5436 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5437 StringRef CStr(S.getPointer());
5438 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5439 Error(S, "scale_offset is not supported on this GPU");
5440 }
5441 if (CPol & CPol::NV) {
5442 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5443 StringRef CStr(S.getPointer());
5444 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5445 Error(S, "nv is not supported on this GPU");
5446 }
5447 }
5448
5449 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5450 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5451 StringRef CStr(S.getPointer());
5452 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5453 Error(S, "scale_offset is not supported for this instruction");
5454 }
5455
5456 if (isGFX12Plus())
5457 return validateTHAndScopeBits(Inst, Operands, CPol);
5458
5459 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5460 if (TSFlags & SIInstrFlags::SMRD) {
5461 if (CPol && (isSI() || isCI())) {
5462 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5463 Error(S, "cache policy is not supported for SMRD instructions");
5464 return false;
5465 }
5466 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5467 Error(IDLoc, "invalid cache policy for SMEM instruction");
5468 return false;
5469 }
5470 }
5471
5472 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5473 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5476 if (!(TSFlags & AllowSCCModifier)) {
5477 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5478 StringRef CStr(S.getPointer());
5479 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5480 Error(S,
5481 "scc modifier is not supported for this instruction on this GPU");
5482 return false;
5483 }
5484 }
5485
5487 return true;
5488
5489 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5490 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5491 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5492 : "instruction must use glc");
5493 return false;
5494 }
5495 } else {
5496 if (CPol & CPol::GLC) {
5497 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5498 StringRef CStr(S.getPointer());
5500 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5501 Error(S, isGFX940() ? "instruction must not use sc0"
5502 : "instruction must not use glc");
5503 return false;
5504 }
5505 }
5506
5507 return true;
5508}
5509
5510bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5511 const OperandVector &Operands,
5512 const unsigned CPol) {
5513 const unsigned TH = CPol & AMDGPU::CPol::TH;
5514 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5515
5516 const unsigned Opcode = Inst.getOpcode();
5517 const MCInstrDesc &TID = MII.get(Opcode);
5518
5519 auto PrintError = [&](StringRef Msg) {
5520 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5521 Error(S, Msg);
5522 return false;
5523 };
5524
5525 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5527 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5528
5529 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5532 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5533
5534 if (TH == 0)
5535 return true;
5536
5537 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5538 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5539 (TH == AMDGPU::CPol::TH_NT_HT)))
5540 return PrintError("invalid th value for SMEM instruction");
5541
5542 if (TH == AMDGPU::CPol::TH_BYPASS) {
5543 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5545 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5547 return PrintError("scope and th combination is not valid");
5548 }
5549
5550 unsigned THType = AMDGPU::getTemporalHintType(TID);
5551 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5552 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5553 return PrintError("invalid th value for atomic instructions");
5554 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5555 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5556 return PrintError("invalid th value for store instructions");
5557 } else {
5558 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5559 return PrintError("invalid th value for load instructions");
5560 }
5561
5562 return true;
5563}
5564
5565bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5566 const OperandVector &Operands) {
5567 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5568 if (Desc.mayStore() &&
5570 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5571 if (Loc != getInstLoc(Operands)) {
5572 Error(Loc, "TFE modifier has no meaning for store instructions");
5573 return false;
5574 }
5575 }
5576
5577 return true;
5578}
5579
5580bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5581 const OperandVector &Operands) {
5582 unsigned Opc = Inst.getOpcode();
5583 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5584 const MCInstrDesc &Desc = MII.get(Opc);
5585
5586 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5587 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5588 if (FmtIdx == -1)
5589 return true;
5590 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5591 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5592 unsigned RegSize =
5593 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5594 .getSizeInBits();
5595
5597 return true;
5598
5599 Error(getOperandLoc(Operands, SrcIdx),
5600 "wrong register tuple size for " +
5601 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5602 return false;
5603 };
5604
5605 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5606 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5607}
5608
5609bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5610 const OperandVector &Operands) {
5611 if (!validateLdsDirect(Inst, Operands))
5612 return false;
5613 if (!validateTrue16OpSel(Inst)) {
5614 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5615 "op_sel operand conflicts with 16-bit operand suffix");
5616 return false;
5617 }
5618 if (!validateSOPLiteral(Inst, Operands))
5619 return false;
5620 if (!validateVOPLiteral(Inst, Operands)) {
5621 return false;
5622 }
5623 if (!validateConstantBusLimitations(Inst, Operands)) {
5624 return false;
5625 }
5626 if (!validateVOPD(Inst, Operands)) {
5627 return false;
5628 }
5629 if (!validateIntClampSupported(Inst)) {
5630 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5631 "integer clamping is not supported on this GPU");
5632 return false;
5633 }
5634 if (!validateOpSel(Inst)) {
5635 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5636 "invalid op_sel operand");
5637 return false;
5638 }
5639 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5640 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5641 "invalid neg_lo operand");
5642 return false;
5643 }
5644 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5645 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5646 "invalid neg_hi operand");
5647 return false;
5648 }
5649 if (!validateDPP(Inst, Operands)) {
5650 return false;
5651 }
5652 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5653 if (!validateMIMGD16(Inst)) {
5654 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5655 "d16 modifier is not supported on this GPU");
5656 return false;
5657 }
5658 if (!validateMIMGDim(Inst, Operands)) {
5659 Error(IDLoc, "missing dim operand");
5660 return false;
5661 }
5662 if (!validateTensorR128(Inst)) {
5663 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5664 "instruction must set modifier r128=0");
5665 return false;
5666 }
5667 if (!validateMIMGMSAA(Inst)) {
5668 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5669 "invalid dim; must be MSAA type");
5670 return false;
5671 }
5672 if (!validateMIMGDataSize(Inst, IDLoc)) {
5673 return false;
5674 }
5675 if (!validateMIMGAddrSize(Inst, IDLoc))
5676 return false;
5677 if (!validateMIMGAtomicDMask(Inst)) {
5678 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5679 "invalid atomic image dmask");
5680 return false;
5681 }
5682 if (!validateMIMGGatherDMask(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5684 "invalid image_gather dmask: only one bit must be set");
5685 return false;
5686 }
5687 if (!validateMovrels(Inst, Operands)) {
5688 return false;
5689 }
5690 if (!validateOffset(Inst, Operands)) {
5691 return false;
5692 }
5693 if (!validateMAIAccWrite(Inst, Operands)) {
5694 return false;
5695 }
5696 if (!validateMAISrc2(Inst, Operands)) {
5697 return false;
5698 }
5699 if (!validateMFMA(Inst, Operands)) {
5700 return false;
5701 }
5702 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5703 return false;
5704 }
5705
5706 if (!validateAGPRLdSt(Inst)) {
5707 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5708 ? "invalid register class: data and dst should be all VGPR or AGPR"
5709 : "invalid register class: agpr loads and stores not supported on this GPU"
5710 );
5711 return false;
5712 }
5713 if (!validateVGPRAlign(Inst)) {
5714 Error(IDLoc,
5715 "invalid register class: vgpr tuples must be 64 bit aligned");
5716 return false;
5717 }
5718 if (!validateDS(Inst, Operands)) {
5719 return false;
5720 }
5721
5722 if (!validateBLGP(Inst, Operands)) {
5723 return false;
5724 }
5725
5726 if (!validateDivScale(Inst)) {
5727 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5728 return false;
5729 }
5730 if (!validateWaitCnt(Inst, Operands)) {
5731 return false;
5732 }
5733 if (!validateTFE(Inst, Operands)) {
5734 return false;
5735 }
5736 if (!validateWMMA(Inst, Operands)) {
5737 return false;
5738 }
5739
5740 return true;
5741}
5742
5744 const FeatureBitset &FBS,
5745 unsigned VariantID = 0);
5746
5747static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5748 const FeatureBitset &AvailableFeatures,
5749 unsigned VariantID);
5750
5751bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5752 const FeatureBitset &FBS) {
5753 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5754}
5755
5756bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5757 const FeatureBitset &FBS,
5758 ArrayRef<unsigned> Variants) {
5759 for (auto Variant : Variants) {
5760 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5761 return true;
5762 }
5763
5764 return false;
5765}
5766
5767bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5768 SMLoc IDLoc) {
5769 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5770
5771 // Check if requested instruction variant is supported.
5772 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5773 return false;
5774
5775 // This instruction is not supported.
5776 // Clear any other pending errors because they are no longer relevant.
5777 getParser().clearPendingErrors();
5778
5779 // Requested instruction variant is not supported.
5780 // Check if any other variants are supported.
5781 StringRef VariantName = getMatchedVariantName();
5782 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5783 return Error(IDLoc,
5784 Twine(VariantName,
5785 " variant of this instruction is not supported"));
5786 }
5787
5788 // Check if this instruction may be used with a different wavesize.
5789 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5790 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5791 // FIXME: Use getAvailableFeatures, and do not manually recompute
5792 FeatureBitset FeaturesWS32 = getFeatureBits();
5793 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5794 .flip(AMDGPU::FeatureWavefrontSize32);
5795 FeatureBitset AvailableFeaturesWS32 =
5796 ComputeAvailableFeatures(FeaturesWS32);
5797
5798 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5799 return Error(IDLoc, "instruction requires wavesize=32");
5800 }
5801
5802 // Finally check if this instruction is supported on any other GPU.
5803 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5804 return Error(IDLoc, "instruction not supported on this GPU (" +
5805 getSTI().getCPU() + ")" + ": " + Mnemo);
5806 }
5807
5808 // Instruction not supported on any GPU. Probably a typo.
5809 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5810 return Error(IDLoc, "invalid instruction" + Suggestion);
5811}
5812
5813static bool isInvalidVOPDY(const OperandVector &Operands,
5814 uint64_t InvalidOprIdx) {
5815 assert(InvalidOprIdx < Operands.size());
5816 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5817 if (Op.isToken() && InvalidOprIdx > 1) {
5818 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5819 return PrevOp.isToken() && PrevOp.getToken() == "::";
5820 }
5821 return false;
5822}
5823
5824bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5825 OperandVector &Operands,
5826 MCStreamer &Out,
5827 uint64_t &ErrorInfo,
5828 bool MatchingInlineAsm) {
5829 MCInst Inst;
5830 Inst.setLoc(IDLoc);
5831 unsigned Result = Match_Success;
5832 for (auto Variant : getMatchedVariants()) {
5833 uint64_t EI;
5834 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5835 Variant);
5836 // We order match statuses from least to most specific. We use most specific
5837 // status as resulting
5838 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5839 if (R == Match_Success || R == Match_MissingFeature ||
5840 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5841 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5842 Result != Match_MissingFeature)) {
5843 Result = R;
5844 ErrorInfo = EI;
5845 }
5846 if (R == Match_Success)
5847 break;
5848 }
5849
5850 if (Result == Match_Success) {
5851 if (!validateInstruction(Inst, IDLoc, Operands)) {
5852 return true;
5853 }
5854 Out.emitInstruction(Inst, getSTI());
5855 return false;
5856 }
5857
5858 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5859 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5860 return true;
5861 }
5862
5863 switch (Result) {
5864 default: break;
5865 case Match_MissingFeature:
5866 // It has been verified that the specified instruction
5867 // mnemonic is valid. A match was found but it requires
5868 // features which are not supported on this GPU.
5869 return Error(IDLoc, "operands are not valid for this GPU or mode");
5870
5871 case Match_InvalidOperand: {
5872 SMLoc ErrorLoc = IDLoc;
5873 if (ErrorInfo != ~0ULL) {
5874 if (ErrorInfo >= Operands.size()) {
5875 return Error(IDLoc, "too few operands for instruction");
5876 }
5877 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5878 if (ErrorLoc == SMLoc())
5879 ErrorLoc = IDLoc;
5880
5881 if (isInvalidVOPDY(Operands, ErrorInfo))
5882 return Error(ErrorLoc, "invalid VOPDY instruction");
5883 }
5884 return Error(ErrorLoc, "invalid operand for instruction");
5885 }
5886
5887 case Match_MnemonicFail:
5888 llvm_unreachable("Invalid instructions should have been handled already");
5889 }
5890 llvm_unreachable("Implement any new match types added!");
5891}
5892
5893bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5894 int64_t Tmp = -1;
5895 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5896 return true;
5897 }
5898 if (getParser().parseAbsoluteExpression(Tmp)) {
5899 return true;
5900 }
5901 Ret = static_cast<uint32_t>(Tmp);
5902 return false;
5903}
5904
5905bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5906 if (!getSTI().getTargetTriple().isAMDGCN())
5907 return TokError("directive only supported for amdgcn architecture");
5908
5909 std::string TargetIDDirective;
5910 SMLoc TargetStart = getTok().getLoc();
5911 if (getParser().parseEscapedString(TargetIDDirective))
5912 return true;
5913
5914 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5915 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5916 return getParser().Error(TargetRange.Start,
5917 (Twine(".amdgcn_target directive's target id ") +
5918 Twine(TargetIDDirective) +
5919 Twine(" does not match the specified target id ") +
5920 Twine(getTargetStreamer().getTargetID()->toString())).str());
5921
5922 return false;
5923}
5924
5925bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5926 return Error(Range.Start, "value out of range", Range);
5927}
5928
5929bool AMDGPUAsmParser::calculateGPRBlocks(
5930 const FeatureBitset &Features, const MCExpr *VCCUsed,
5931 const MCExpr *FlatScrUsed, bool XNACKUsed,
5932 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5933 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5934 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5935 // TODO(scott.linder): These calculations are duplicated from
5936 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5937 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5938 MCContext &Ctx = getContext();
5939
5940 const MCExpr *NumSGPRs = NextFreeSGPR;
5941 int64_t EvaluatedSGPRs;
5942
5943 if (Version.Major >= 10)
5945 else {
5946 unsigned MaxAddressableNumSGPRs =
5948
5949 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5950 !Features.test(FeatureSGPRInitBug) &&
5951 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5952 return OutOfRangeError(SGPRRange);
5953
5954 const MCExpr *ExtraSGPRs =
5955 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5956 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5957
5958 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5959 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5960 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5961 return OutOfRangeError(SGPRRange);
5962
5963 if (Features.test(FeatureSGPRInitBug))
5964 NumSGPRs =
5966 }
5967
5968 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5969 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5970 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5971 unsigned Granule) -> const MCExpr * {
5972 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5973 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5974 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5975 const MCExpr *AlignToGPR =
5976 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5977 const MCExpr *DivGPR =
5978 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5979 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5980 return SubGPR;
5981 };
5982
5983 VGPRBlocks = GetNumGPRBlocks(
5984 NextFreeVGPR,
5985 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5986 SGPRBlocks =
5987 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5988
5989 return false;
5990}
5991
5992bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5993 if (!getSTI().getTargetTriple().isAMDGCN())
5994 return TokError("directive only supported for amdgcn architecture");
5995
5996 if (!isHsaAbi(getSTI()))
5997 return TokError("directive only supported for amdhsa OS");
5998
5999 StringRef KernelName;
6000 if (getParser().parseIdentifier(KernelName))
6001 return true;
6002
6003 AMDGPU::MCKernelDescriptor KD =
6005 &getSTI(), getContext());
6006
6007 StringSet<> Seen;
6008
6009 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6010
6011 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6012 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6013
6014 SMRange VGPRRange;
6015 const MCExpr *NextFreeVGPR = ZeroExpr;
6016 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6017 const MCExpr *NamedBarCnt = ZeroExpr;
6018 uint64_t SharedVGPRCount = 0;
6019 uint64_t PreloadLength = 0;
6020 uint64_t PreloadOffset = 0;
6021 SMRange SGPRRange;
6022 const MCExpr *NextFreeSGPR = ZeroExpr;
6023
6024 // Count the number of user SGPRs implied from the enabled feature bits.
6025 unsigned ImpliedUserSGPRCount = 0;
6026
6027 // Track if the asm explicitly contains the directive for the user SGPR
6028 // count.
6029 std::optional<unsigned> ExplicitUserSGPRCount;
6030 const MCExpr *ReserveVCC = OneExpr;
6031 const MCExpr *ReserveFlatScr = OneExpr;
6032 std::optional<bool> EnableWavefrontSize32;
6033
6034 while (true) {
6035 while (trySkipToken(AsmToken::EndOfStatement));
6036
6037 StringRef ID;
6038 SMRange IDRange = getTok().getLocRange();
6039 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6040 return true;
6041
6042 if (ID == ".end_amdhsa_kernel")
6043 break;
6044
6045 if (!Seen.insert(ID).second)
6046 return TokError(".amdhsa_ directives cannot be repeated");
6047
6048 SMLoc ValStart = getLoc();
6049 const MCExpr *ExprVal;
6050 if (getParser().parseExpression(ExprVal))
6051 return true;
6052 SMLoc ValEnd = getLoc();
6053 SMRange ValRange = SMRange(ValStart, ValEnd);
6054
6055 int64_t IVal = 0;
6056 uint64_t Val = IVal;
6057 bool EvaluatableExpr;
6058 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6059 if (IVal < 0)
6060 return OutOfRangeError(ValRange);
6061 Val = IVal;
6062 }
6063
6064#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6065 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6066 return OutOfRangeError(RANGE); \
6067 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6068 getContext());
6069
6070// Some fields use the parsed value immediately which requires the expression to
6071// be solvable.
6072#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6073 if (!(RESOLVED)) \
6074 return Error(IDRange.Start, "directive should have resolvable expression", \
6075 IDRange);
6076
6077 if (ID == ".amdhsa_group_segment_fixed_size") {
6079 CHAR_BIT>(Val))
6080 return OutOfRangeError(ValRange);
6081 KD.group_segment_fixed_size = ExprVal;
6082 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6084 CHAR_BIT>(Val))
6085 return OutOfRangeError(ValRange);
6086 KD.private_segment_fixed_size = ExprVal;
6087 } else if (ID == ".amdhsa_kernarg_size") {
6088 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6089 return OutOfRangeError(ValRange);
6090 KD.kernarg_size = ExprVal;
6091 } else if (ID == ".amdhsa_user_sgpr_count") {
6092 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6093 ExplicitUserSGPRCount = Val;
6094 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6095 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6097 return Error(IDRange.Start,
6098 "directive is not supported with architected flat scratch",
6099 IDRange);
6101 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6102 ExprVal, ValRange);
6103 if (Val)
6104 ImpliedUserSGPRCount += 4;
6105 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6106 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6107 if (!hasKernargPreload())
6108 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6109
6110 if (Val > getMaxNumUserSGPRs())
6111 return OutOfRangeError(ValRange);
6112 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6113 ValRange);
6114 if (Val) {
6115 ImpliedUserSGPRCount += Val;
6116 PreloadLength = Val;
6117 }
6118 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6119 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6120 if (!hasKernargPreload())
6121 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6122
6123 if (Val >= 1024)
6124 return OutOfRangeError(ValRange);
6125 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6126 ValRange);
6127 if (Val)
6128 PreloadOffset = Val;
6129 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6130 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6132 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6133 ValRange);
6134 if (Val)
6135 ImpliedUserSGPRCount += 2;
6136 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6137 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6139 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6140 ValRange);
6141 if (Val)
6142 ImpliedUserSGPRCount += 2;
6143 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6144 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6146 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6147 ExprVal, ValRange);
6148 if (Val)
6149 ImpliedUserSGPRCount += 2;
6150 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6151 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6153 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6154 ValRange);
6155 if (Val)
6156 ImpliedUserSGPRCount += 2;
6157 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6159 return Error(IDRange.Start,
6160 "directive is not supported with architected flat scratch",
6161 IDRange);
6162 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6164 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6165 ExprVal, ValRange);
6166 if (Val)
6167 ImpliedUserSGPRCount += 2;
6168 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6169 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6171 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6172 ExprVal, ValRange);
6173 if (Val)
6174 ImpliedUserSGPRCount += 1;
6175 } else if (ID == ".amdhsa_wavefront_size32") {
6176 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6177 if (IVersion.Major < 10)
6178 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6179 EnableWavefrontSize32 = Val;
6181 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6182 ValRange);
6183 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6185 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6186 ValRange);
6187 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6189 return Error(IDRange.Start,
6190 "directive is not supported with architected flat scratch",
6191 IDRange);
6193 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6194 ValRange);
6195 } else if (ID == ".amdhsa_enable_private_segment") {
6197 return Error(
6198 IDRange.Start,
6199 "directive is not supported without architected flat scratch",
6200 IDRange);
6202 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6203 ValRange);
6204 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6206 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6207 ValRange);
6208 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6210 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6211 ValRange);
6212 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6214 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6215 ValRange);
6216 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6218 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6219 ValRange);
6220 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6222 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6223 ValRange);
6224 } else if (ID == ".amdhsa_next_free_vgpr") {
6225 VGPRRange = ValRange;
6226 NextFreeVGPR = ExprVal;
6227 } else if (ID == ".amdhsa_next_free_sgpr") {
6228 SGPRRange = ValRange;
6229 NextFreeSGPR = ExprVal;
6230 } else if (ID == ".amdhsa_accum_offset") {
6231 if (!isGFX90A())
6232 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6233 AccumOffset = ExprVal;
6234 } else if (ID == ".amdhsa_named_barrier_count") {
6235 if (!isGFX1250Plus())
6236 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6237 NamedBarCnt = ExprVal;
6238 } else if (ID == ".amdhsa_reserve_vcc") {
6239 if (EvaluatableExpr && !isUInt<1>(Val))
6240 return OutOfRangeError(ValRange);
6241 ReserveVCC = ExprVal;
6242 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6243 if (IVersion.Major < 7)
6244 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6246 return Error(IDRange.Start,
6247 "directive is not supported with architected flat scratch",
6248 IDRange);
6249 if (EvaluatableExpr && !isUInt<1>(Val))
6250 return OutOfRangeError(ValRange);
6251 ReserveFlatScr = ExprVal;
6252 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6253 if (IVersion.Major < 8)
6254 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6255 if (!isUInt<1>(Val))
6256 return OutOfRangeError(ValRange);
6257 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6258 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6259 IDRange);
6260 } else if (ID == ".amdhsa_float_round_mode_32") {
6262 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6263 ValRange);
6264 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6266 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6267 ValRange);
6268 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6270 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6271 ValRange);
6272 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6274 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6275 ValRange);
6276 } else if (ID == ".amdhsa_dx10_clamp") {
6277 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6278 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6279 IDRange);
6281 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6282 ValRange);
6283 } else if (ID == ".amdhsa_ieee_mode") {
6284 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6285 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6286 IDRange);
6288 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6289 ValRange);
6290 } else if (ID == ".amdhsa_fp16_overflow") {
6291 if (IVersion.Major < 9)
6292 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6294 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6295 ValRange);
6296 } else if (ID == ".amdhsa_tg_split") {
6297 if (!isGFX90A())
6298 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6299 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6300 ExprVal, ValRange);
6301 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6302 if (!supportsWGP(getSTI()))
6303 return Error(IDRange.Start,
6304 "directive unsupported on " + getSTI().getCPU(), IDRange);
6306 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6307 ValRange);
6308 } else if (ID == ".amdhsa_memory_ordered") {
6309 if (IVersion.Major < 10)
6310 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6312 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6313 ValRange);
6314 } else if (ID == ".amdhsa_forward_progress") {
6315 if (IVersion.Major < 10)
6316 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6318 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6319 ValRange);
6320 } else if (ID == ".amdhsa_shared_vgpr_count") {
6321 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6322 if (IVersion.Major < 10 || IVersion.Major >= 12)
6323 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6324 IDRange);
6325 SharedVGPRCount = Val;
6327 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6328 ValRange);
6329 } else if (ID == ".amdhsa_inst_pref_size") {
6330 if (IVersion.Major < 11)
6331 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6332 if (IVersion.Major == 11) {
6334 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6335 ValRange);
6336 } else {
6338 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6339 ValRange);
6340 }
6341 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6344 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6345 ExprVal, ValRange);
6346 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6348 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6349 ExprVal, ValRange);
6350 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6353 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6354 ExprVal, ValRange);
6355 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6358 ExprVal, ValRange);
6359 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6361 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6362 ExprVal, ValRange);
6363 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6365 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6366 ExprVal, ValRange);
6367 } else if (ID == ".amdhsa_exception_int_div_zero") {
6369 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6370 ExprVal, ValRange);
6371 } else if (ID == ".amdhsa_round_robin_scheduling") {
6372 if (IVersion.Major < 12)
6373 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6375 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6376 ValRange);
6377 } else {
6378 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6379 }
6380
6381#undef PARSE_BITS_ENTRY
6382 }
6383
6384 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6385 return TokError(".amdhsa_next_free_vgpr directive is required");
6386
6387 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6388 return TokError(".amdhsa_next_free_sgpr directive is required");
6389
6390 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6391 if (UserSGPRCount > getMaxNumUserSGPRs())
6392 return TokError("too many user SGPRs enabled, found " +
6393 Twine(UserSGPRCount) + ", but only " +
6394 Twine(getMaxNumUserSGPRs()) + " are supported.");
6395
6396 // Consider the case where the total number of UserSGPRs with trailing
6397 // allocated preload SGPRs, is greater than the number of explicitly
6398 // referenced SGPRs.
6399 if (PreloadLength) {
6400 MCContext &Ctx = getContext();
6401 NextFreeSGPR = AMDGPUMCExpr::createMax(
6402 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6403 }
6404
6405 const MCExpr *VGPRBlocks;
6406 const MCExpr *SGPRBlocks;
6407 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6408 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6409 EnableWavefrontSize32, NextFreeVGPR,
6410 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6411 SGPRBlocks))
6412 return true;
6413
6414 int64_t EvaluatedVGPRBlocks;
6415 bool VGPRBlocksEvaluatable =
6416 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6417 if (VGPRBlocksEvaluatable &&
6419 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6420 return OutOfRangeError(VGPRRange);
6421 }
6423 KD.compute_pgm_rsrc1, VGPRBlocks,
6424 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6425 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6426
6427 int64_t EvaluatedSGPRBlocks;
6428 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6430 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6431 return OutOfRangeError(SGPRRange);
6433 KD.compute_pgm_rsrc1, SGPRBlocks,
6434 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6435 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6436
6437 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6438 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6439 "enabled user SGPRs");
6440
6441 if (isGFX1250Plus()) {
6444 MCConstantExpr::create(UserSGPRCount, getContext()),
6445 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6446 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6447 } else {
6450 MCConstantExpr::create(UserSGPRCount, getContext()),
6451 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6452 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6453 }
6454
6455 int64_t IVal = 0;
6456 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6457 return TokError("Kernarg size should be resolvable");
6458 uint64_t kernarg_size = IVal;
6459 if (PreloadLength && kernarg_size &&
6460 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6461 return TokError("Kernarg preload length + offset is larger than the "
6462 "kernarg segment size");
6463
6464 if (isGFX90A()) {
6465 if (!Seen.contains(".amdhsa_accum_offset"))
6466 return TokError(".amdhsa_accum_offset directive is required");
6467 int64_t EvaluatedAccum;
6468 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6469 uint64_t UEvaluatedAccum = EvaluatedAccum;
6470 if (AccumEvaluatable &&
6471 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6472 return TokError("accum_offset should be in range [4..256] in "
6473 "increments of 4");
6474
6475 int64_t EvaluatedNumVGPR;
6476 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6477 AccumEvaluatable &&
6478 UEvaluatedAccum >
6479 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6480 return TokError("accum_offset exceeds total VGPR allocation");
6481 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6483 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6486 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6487 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6488 getContext());
6489 }
6490
6491 if (isGFX1250Plus())
6493 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6494 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6495 getContext());
6496
6497 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6498 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6499 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6500 return TokError("shared_vgpr_count directive not valid on "
6501 "wavefront size 32");
6502 }
6503
6504 if (VGPRBlocksEvaluatable &&
6505 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6506 63)) {
6507 return TokError("shared_vgpr_count*2 + "
6508 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6509 "exceed 63\n");
6510 }
6511 }
6512
6513 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6514 NextFreeVGPR, NextFreeSGPR,
6515 ReserveVCC, ReserveFlatScr);
6516 return false;
6517}
6518
6519bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6520 uint32_t Version;
6521 if (ParseAsAbsoluteExpression(Version))
6522 return true;
6523
6524 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6525 return false;
6526}
6527
6528bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6529 AMDGPUMCKernelCodeT &C) {
6530 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6531 // assembly for backwards compatibility.
6532 if (ID == "max_scratch_backing_memory_byte_size") {
6533 Parser.eatToEndOfStatement();
6534 return false;
6535 }
6536
6537 SmallString<40> ErrStr;
6538 raw_svector_ostream Err(ErrStr);
6539 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6540 return TokError(Err.str());
6541 }
6542 Lex();
6543
6544 if (ID == "enable_wavefront_size32") {
6545 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6546 if (!isGFX10Plus())
6547 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6548 if (!isWave32())
6549 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6550 } else {
6551 if (!isWave64())
6552 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6553 }
6554 }
6555
6556 if (ID == "wavefront_size") {
6557 if (C.wavefront_size == 5) {
6558 if (!isGFX10Plus())
6559 return TokError("wavefront_size=5 is only allowed on GFX10+");
6560 if (!isWave32())
6561 return TokError("wavefront_size=5 requires +WavefrontSize32");
6562 } else if (C.wavefront_size == 6) {
6563 if (!isWave64())
6564 return TokError("wavefront_size=6 requires +WavefrontSize64");
6565 }
6566 }
6567
6568 return false;
6569}
6570
6571bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6572 AMDGPUMCKernelCodeT KernelCode;
6573 KernelCode.initDefault(&getSTI(), getContext());
6574
6575 while (true) {
6576 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6577 // will set the current token to EndOfStatement.
6578 while(trySkipToken(AsmToken::EndOfStatement));
6579
6580 StringRef ID;
6581 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6582 return true;
6583
6584 if (ID == ".end_amd_kernel_code_t")
6585 break;
6586
6587 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6588 return true;
6589 }
6590
6591 KernelCode.validate(&getSTI(), getContext());
6592 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6593
6594 return false;
6595}
6596
6597bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6598 StringRef KernelName;
6599 if (!parseId(KernelName, "expected symbol name"))
6600 return true;
6601
6602 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6604
6605 KernelScope.initialize(getContext());
6606 return false;
6607}
6608
6609bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6610 if (!getSTI().getTargetTriple().isAMDGCN()) {
6611 return Error(getLoc(),
6612 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6613 "architectures");
6614 }
6615
6616 auto TargetIDDirective = getLexer().getTok().getStringContents();
6617 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6618 return Error(getParser().getTok().getLoc(), "target id must match options");
6619
6620 getTargetStreamer().EmitISAVersion();
6621 Lex();
6622
6623 return false;
6624}
6625
6626bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6627 assert(isHsaAbi(getSTI()));
6628
6629 std::string HSAMetadataString;
6630 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6631 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6632 return true;
6633
6634 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6635 return Error(getLoc(), "invalid HSA metadata");
6636
6637 return false;
6638}
6639
6640/// Common code to parse out a block of text (typically YAML) between start and
6641/// end directives.
6642bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6643 const char *AssemblerDirectiveEnd,
6644 std::string &CollectString) {
6645
6646 raw_string_ostream CollectStream(CollectString);
6647
6648 getLexer().setSkipSpace(false);
6649
6650 bool FoundEnd = false;
6651 while (!isToken(AsmToken::Eof)) {
6652 while (isToken(AsmToken::Space)) {
6653 CollectStream << getTokenStr();
6654 Lex();
6655 }
6656
6657 if (trySkipId(AssemblerDirectiveEnd)) {
6658 FoundEnd = true;
6659 break;
6660 }
6661
6662 CollectStream << Parser.parseStringToEndOfStatement()
6663 << getContext().getAsmInfo().getSeparatorString();
6664
6665 Parser.eatToEndOfStatement();
6666 }
6667
6668 getLexer().setSkipSpace(true);
6669
6670 if (isToken(AsmToken::Eof) && !FoundEnd) {
6671 return TokError(Twine("expected directive ") +
6672 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6673 }
6674
6675 return false;
6676}
6677
6678/// Parse the assembler directive for new MsgPack-format PAL metadata.
6679bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6680 std::string String;
6681 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6683 return true;
6684
6685 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6686 if (!PALMetadata->setFromString(String))
6687 return Error(getLoc(), "invalid PAL metadata");
6688 return false;
6689}
6690
6691/// Parse the assembler directive for old linear-format PAL metadata.
6692bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6693 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6694 return Error(getLoc(),
6695 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6696 "not available on non-amdpal OSes")).str());
6697 }
6698
6699 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6700 PALMetadata->setLegacy();
6701 for (;;) {
6702 uint32_t Key, Value;
6703 if (ParseAsAbsoluteExpression(Key)) {
6704 return TokError(Twine("invalid value in ") +
6706 }
6707 if (!trySkipToken(AsmToken::Comma)) {
6708 return TokError(Twine("expected an even number of values in ") +
6710 }
6711 if (ParseAsAbsoluteExpression(Value)) {
6712 return TokError(Twine("invalid value in ") +
6714 }
6715 PALMetadata->setRegister(Key, Value);
6716 if (!trySkipToken(AsmToken::Comma))
6717 break;
6718 }
6719 return false;
6720}
6721
6722/// ParseDirectiveAMDGPULDS
6723/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6724bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6725 if (getParser().checkForValidSection())
6726 return true;
6727
6728 StringRef Name;
6729 SMLoc NameLoc = getLoc();
6730 if (getParser().parseIdentifier(Name))
6731 return TokError("expected identifier in directive");
6732
6733 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6734 if (getParser().parseComma())
6735 return true;
6736
6737 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6738
6739 int64_t Size;
6740 SMLoc SizeLoc = getLoc();
6741 if (getParser().parseAbsoluteExpression(Size))
6742 return true;
6743 if (Size < 0)
6744 return Error(SizeLoc, "size must be non-negative");
6745 if (Size > LocalMemorySize)
6746 return Error(SizeLoc, "size is too large");
6747
6748 int64_t Alignment = 4;
6749 if (trySkipToken(AsmToken::Comma)) {
6750 SMLoc AlignLoc = getLoc();
6751 if (getParser().parseAbsoluteExpression(Alignment))
6752 return true;
6753 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6754 return Error(AlignLoc, "alignment must be a power of two");
6755
6756 // Alignment larger than the size of LDS is possible in theory, as long
6757 // as the linker manages to place to symbol at address 0, but we do want
6758 // to make sure the alignment fits nicely into a 32-bit integer.
6759 if (Alignment >= 1u << 31)
6760 return Error(AlignLoc, "alignment is too large");
6761 }
6762
6763 if (parseEOL())
6764 return true;
6765
6766 Symbol->redefineIfPossible();
6767 if (!Symbol->isUndefined())
6768 return Error(NameLoc, "invalid symbol redefinition");
6769
6770 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6771 return false;
6772}
6773
6774bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6775 if (getParser().checkForValidSection())
6776 return true;
6777
6778 StringRef FuncName;
6779 if (getParser().parseIdentifier(FuncName))
6780 return TokError("expected symbol name after .amdgpu_info");
6781
6782 MCSymbol *FuncSym = getContext().getOrCreateSymbol(FuncName);
6783 AMDGPU::InfoSectionData ParsedInfoData;
6784 AMDGPU::FuncInfo FI;
6785 FI.Sym = FuncSym;
6786 bool HasScalarAttrs = false;
6787
6788 while (true) {
6789 while (trySkipToken(AsmToken::EndOfStatement))
6790 ;
6791
6792 StringRef ID;
6793 SMLoc IDLoc = getLoc();
6794 if (!parseId(ID, "expected directive or .end_amdgpu_info"))
6795 return true;
6796
6797 if (ID == ".end_amdgpu_info")
6798 break;
6799
6800 // Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
6801 // it once and dispatch on the distinguishing suffix below. The unstripped
6802 // ID is preserved for diagnostics.
6803 StringRef Dir = ID;
6804 if (!Dir.consume_front(".amdgpu_"))
6805 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6806
6807 if (Dir == "flags") {
6808 int64_t Val;
6809 if (getParser().parseAbsoluteExpression(Val))
6810 return true;
6811 auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
6812 FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6813 FI.UsesFlatScratch =
6814 !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6815 FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
6816 HasScalarAttrs = true;
6817 } else if (Dir == "num_sgpr") {
6818 int64_t Val;
6819 if (getParser().parseAbsoluteExpression(Val))
6820 return true;
6821 FI.NumSGPR = static_cast<uint32_t>(Val);
6822 HasScalarAttrs = true;
6823 } else if (Dir == "num_vgpr") {
6824 int64_t Val;
6825 if (getParser().parseAbsoluteExpression(Val))
6826 return true;
6827 FI.NumArchVGPR = static_cast<uint32_t>(Val);
6828 HasScalarAttrs = true;
6829 } else if (Dir == "num_agpr") {
6830 int64_t Val;
6831 if (getParser().parseAbsoluteExpression(Val))
6832 return true;
6833 FI.NumAccVGPR = static_cast<uint32_t>(Val);
6834 HasScalarAttrs = true;
6835 } else if (Dir == "private_segment_size") {
6836 int64_t Val;
6837 if (getParser().parseAbsoluteExpression(Val))
6838 return true;
6839 FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
6840 HasScalarAttrs = true;
6841 } else if (Dir == "use") {
6842 StringRef ResName;
6843 if (getParser().parseIdentifier(ResName))
6844 return TokError("expected resource symbol for .amdgpu_use");
6845 ParsedInfoData.Uses.push_back(
6846 {FuncSym, getContext().getOrCreateSymbol(ResName)});
6847 } else if (Dir == "call") {
6848 StringRef DstName;
6849 if (getParser().parseIdentifier(DstName))
6850 return TokError("expected callee symbol for .amdgpu_call");
6851 ParsedInfoData.Calls.push_back(
6852 {FuncSym, getContext().getOrCreateSymbol(DstName)});
6853 } else if (Dir == "indirect_call") {
6854 std::string TypeId;
6855 if (getParser().parseEscapedString(TypeId))
6856 return TokError("expected type ID string for .amdgpu_indirect_call");
6857 ParsedInfoData.IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6858 } else if (Dir == "typeid") {
6859 std::string TypeId;
6860 if (getParser().parseEscapedString(TypeId))
6861 return TokError("expected type ID string for .amdgpu_typeid");
6862 ParsedInfoData.TypeIds.push_back({FuncSym, std::move(TypeId)});
6863 } else {
6864 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6865 }
6866 }
6867
6868 if (HasScalarAttrs)
6869 ParsedInfoData.Funcs.push_back(std::move(FI));
6870
6871 AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
6872 for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
6873 Data.Funcs.push_back(std::move(Func));
6874 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.Uses)
6875 Data.Uses.push_back(Use);
6876 for (std::pair<MCSymbol *, MCSymbol *> &Call : ParsedInfoData.Calls)
6877 Data.Calls.push_back(Call);
6878 for (std::pair<MCSymbol *, std::string> &IndirectCall :
6879 ParsedInfoData.IndirectCalls)
6880 Data.IndirectCalls.push_back(std::move(IndirectCall));
6881 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
6882 Data.TypeIds.push_back(std::move(TypeId));
6883
6884 return false;
6885}
6886
6887void AMDGPUAsmParser::onEndOfFile() {
6888 if (InfoData)
6889 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6890}
6891
6892bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6893 StringRef IDVal = DirectiveID.getString();
6894
6895 if (isHsaAbi(getSTI())) {
6896 if (IDVal == ".amdhsa_kernel")
6897 return ParseDirectiveAMDHSAKernel();
6898
6899 if (IDVal == ".amdhsa_code_object_version")
6900 return ParseDirectiveAMDHSACodeObjectVersion();
6901
6902 // TODO: Restructure/combine with PAL metadata directive.
6904 return ParseDirectiveHSAMetadata();
6905 } else {
6906 if (IDVal == ".amd_kernel_code_t")
6907 return ParseDirectiveAMDKernelCodeT();
6908
6909 if (IDVal == ".amdgpu_hsa_kernel")
6910 return ParseDirectiveAMDGPUHsaKernel();
6911
6912 if (IDVal == ".amd_amdgpu_isa")
6913 return ParseDirectiveISAVersion();
6914
6916 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6917 Twine(" directive is "
6918 "not available on non-amdhsa OSes"))
6919 .str());
6920 }
6921 }
6922
6923 if (IDVal == ".amdgcn_target")
6924 return ParseDirectiveAMDGCNTarget();
6925
6926 if (IDVal == ".amdgpu_lds")
6927 return ParseDirectiveAMDGPULDS();
6928
6929 if (IDVal == ".amdgpu_info")
6930 return ParseDirectiveAMDGPUInfo();
6931
6932 if (IDVal == PALMD::AssemblerDirectiveBegin)
6933 return ParseDirectivePALMetadataBegin();
6934
6935 if (IDVal == PALMD::AssemblerDirective)
6936 return ParseDirectivePALMetadata();
6937
6938 return true;
6939}
6940
6941bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6942 MCRegister Reg) {
6943 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6944 return isGFX9Plus();
6945
6946 // GFX10+ has 2 more SGPRs 104 and 105.
6947 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6948 return hasSGPR104_SGPR105();
6949
6950 switch (Reg.id()) {
6951 case SRC_SHARED_BASE_LO:
6952 case SRC_SHARED_BASE:
6953 case SRC_SHARED_LIMIT_LO:
6954 case SRC_SHARED_LIMIT:
6955 case SRC_PRIVATE_BASE_LO:
6956 case SRC_PRIVATE_BASE:
6957 case SRC_PRIVATE_LIMIT_LO:
6958 case SRC_PRIVATE_LIMIT:
6959 return isGFX9Plus();
6960 case SRC_FLAT_SCRATCH_BASE_LO:
6961 case SRC_FLAT_SCRATCH_BASE_HI:
6962 return hasGloballyAddressableScratch();
6963 case SRC_POPS_EXITING_WAVE_ID:
6964 return isGFX9Plus() && !isGFX11Plus();
6965 case TBA:
6966 case TBA_LO:
6967 case TBA_HI:
6968 case TMA:
6969 case TMA_LO:
6970 case TMA_HI:
6971 return !isGFX9Plus();
6972 case XNACK_MASK:
6973 case XNACK_MASK_LO:
6974 case XNACK_MASK_HI:
6975 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6976 case SGPR_NULL:
6977 return isGFX10Plus();
6978 case SRC_EXECZ:
6979 case SRC_VCCZ:
6980 return !isGFX11Plus();
6981 default:
6982 break;
6983 }
6984
6985 if (isCI())
6986 return true;
6987
6988 if (isSI() || isGFX10Plus()) {
6989 // No flat_scr on SI.
6990 // On GFX10Plus flat scratch is not a valid register operand and can only be
6991 // accessed with s_setreg/s_getreg.
6992 switch (Reg.id()) {
6993 case FLAT_SCR:
6994 case FLAT_SCR_LO:
6995 case FLAT_SCR_HI:
6996 return false;
6997 default:
6998 return true;
6999 }
7000 }
7001
7002 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7003 // SI/CI have.
7004 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
7005 return hasSGPR102_SGPR103();
7006
7007 return true;
7008}
7009
7010ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7011 StringRef Mnemonic,
7012 OperandMode Mode) {
7013 ParseStatus Res = parseVOPD(Operands);
7014 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7015 return Res;
7016
7017 // Try to parse with a custom parser
7018 Res = MatchOperandParserImpl(Operands, Mnemonic);
7019
7020 // If we successfully parsed the operand or if there as an error parsing,
7021 // we are done.
7022 //
7023 // If we are parsing after we reach EndOfStatement then this means we
7024 // are appending default values to the Operands list. This is only done
7025 // by custom parser, so we shouldn't continue on to the generic parsing.
7026 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7027 return Res;
7028
7029 SMLoc RBraceLoc;
7030 SMLoc LBraceLoc = getLoc();
7031 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
7032 unsigned Prefix = Operands.size();
7033
7034 for (;;) {
7035 auto Loc = getLoc();
7036 Res = parseReg(Operands);
7037 if (Res.isNoMatch())
7038 Error(Loc, "expected a register");
7039 if (!Res.isSuccess())
7040 return ParseStatus::Failure;
7041
7042 RBraceLoc = getLoc();
7043 if (trySkipToken(AsmToken::RBrac))
7044 break;
7045
7046 if (!skipToken(AsmToken::Comma,
7047 "expected a comma or a closing square bracket"))
7048 return ParseStatus::Failure;
7049 }
7050
7051 if (Operands.size() - Prefix > 1) {
7052 Operands.insert(Operands.begin() + Prefix,
7053 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
7054 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
7055 }
7056
7057 return ParseStatus::Success;
7058 }
7059
7060 return parseRegOrImm(Operands);
7061}
7062
7063StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7064 // Clear any forced encodings from the previous instruction.
7065 setForcedEncodingSize(0);
7066 setForcedDPP(false);
7067 setForcedSDWA(false);
7068
7069 if (Name.consume_back("_e64_dpp")) {
7070 setForcedDPP(true);
7071 setForcedEncodingSize(64);
7072 return Name;
7073 }
7074 if (Name.consume_back("_e64")) {
7075 setForcedEncodingSize(64);
7076 return Name;
7077 }
7078 if (Name.consume_back("_e32")) {
7079 setForcedEncodingSize(32);
7080 return Name;
7081 }
7082 if (Name.consume_back("_dpp")) {
7083 setForcedDPP(true);
7084 return Name;
7085 }
7086 if (Name.consume_back("_sdwa")) {
7087 setForcedSDWA(true);
7088 return Name;
7089 }
7090 return Name;
7091}
7092
7093static void applyMnemonicAliases(StringRef &Mnemonic,
7094 const FeatureBitset &Features,
7095 unsigned VariantID);
7096
7097bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7098 StringRef Name, SMLoc NameLoc,
7099 OperandVector &Operands) {
7100 // Add the instruction mnemonic
7101 Name = parseMnemonicSuffix(Name);
7102
7103 // If the target architecture uses MnemonicAlias, call it here to parse
7104 // operands correctly.
7105 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
7106
7107 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
7108
7109 bool IsMIMG = Name.starts_with("image_");
7110
7111 while (!trySkipToken(AsmToken::EndOfStatement)) {
7112 OperandMode Mode = OperandMode_Default;
7113 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7114 Mode = OperandMode_NSA;
7115 ParseStatus Res = parseOperand(Operands, Name, Mode);
7116
7117 if (!Res.isSuccess()) {
7118 checkUnsupportedInstruction(Name, NameLoc);
7119 if (!Parser.hasPendingError()) {
7120 // FIXME: use real operand location rather than the current location.
7121 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7122 : "not a valid operand.";
7123 Error(getLoc(), Msg);
7124 }
7125 while (!trySkipToken(AsmToken::EndOfStatement)) {
7126 lex();
7127 }
7128 return true;
7129 }
7130
7131 // Eat the comma or space if there is one.
7132 trySkipToken(AsmToken::Comma);
7133 }
7134
7135 return false;
7136}
7137
7138//===----------------------------------------------------------------------===//
7139// Utility functions
7140//===----------------------------------------------------------------------===//
7141
7142ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7143 OperandVector &Operands) {
7144 SMLoc S = getLoc();
7145 if (!trySkipId(Name))
7146 return ParseStatus::NoMatch;
7147
7148 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7149 return ParseStatus::Success;
7150}
7151
7152ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7153 int64_t &IntVal) {
7154
7155 if (!trySkipId(Prefix, AsmToken::Colon))
7156 return ParseStatus::NoMatch;
7157
7159}
7160
7161ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7162 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7163 std::function<bool(int64_t &)> ConvertResult) {
7164 SMLoc S = getLoc();
7165 int64_t Value = 0;
7166
7167 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7168 if (!Res.isSuccess())
7169 return Res;
7170
7171 if (ConvertResult && !ConvertResult(Value)) {
7172 Error(S, "invalid " + StringRef(Prefix) + " value.");
7173 }
7174
7175 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7176 return ParseStatus::Success;
7177}
7178
7179ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7180 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7181 bool (*ConvertResult)(int64_t &)) {
7182 SMLoc S = getLoc();
7183 if (!trySkipId(Prefix, AsmToken::Colon))
7184 return ParseStatus::NoMatch;
7185
7186 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7187 return ParseStatus::Failure;
7188
7189 unsigned Val = 0;
7190 const unsigned MaxSize = 4;
7191
7192 // FIXME: How to verify the number of elements matches the number of src
7193 // operands?
7194 for (int I = 0; ; ++I) {
7195 int64_t Op;
7196 SMLoc Loc = getLoc();
7197 if (!parseExpr(Op))
7198 return ParseStatus::Failure;
7199
7200 if (Op != 0 && Op != 1)
7201 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7202
7203 Val |= (Op << I);
7204
7205 if (trySkipToken(AsmToken::RBrac))
7206 break;
7207
7208 if (I + 1 == MaxSize)
7209 return Error(getLoc(), "expected a closing square bracket");
7210
7211 if (!skipToken(AsmToken::Comma, "expected a comma"))
7212 return ParseStatus::Failure;
7213 }
7214
7215 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7216 return ParseStatus::Success;
7217}
7218
7219ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7220 OperandVector &Operands,
7221 AMDGPUOperand::ImmTy ImmTy,
7222 bool IgnoreNegative) {
7223 int64_t Bit;
7224 SMLoc S = getLoc();
7225
7226 if (trySkipId(Name)) {
7227 Bit = 1;
7228 } else if (trySkipId("no", Name)) {
7229 if (IgnoreNegative)
7230 return ParseStatus::Success;
7231 Bit = 0;
7232 } else {
7233 return ParseStatus::NoMatch;
7234 }
7235
7236 if (Name == "r128" && !hasMIMG_R128())
7237 return Error(S, "r128 modifier is not supported on this GPU");
7238 if (Name == "a16" && !hasA16())
7239 return Error(S, "a16 modifier is not supported on this GPU");
7240
7241 if (Bit == 0 && Name == "gds") {
7242 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7243 if (Mnemo.starts_with("ds_gws"))
7244 return Error(S, "nogds is not allowed");
7245 }
7246
7247 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7248 ImmTy = AMDGPUOperand::ImmTyR128A16;
7249
7250 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7251 return ParseStatus::Success;
7252}
7253
7254unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7255 bool &Disabling) const {
7256 Disabling = Id.consume_front("no");
7257
7258 if (isGFX940() && !Mnemo.starts_with("s_")) {
7259 return StringSwitch<unsigned>(Id)
7260 .Case("nt", AMDGPU::CPol::NT)
7261 .Case("sc0", AMDGPU::CPol::SC0)
7262 .Case("sc1", AMDGPU::CPol::SC1)
7263 .Default(0);
7264 }
7265
7266 return StringSwitch<unsigned>(Id)
7267 .Case("dlc", AMDGPU::CPol::DLC)
7268 .Case("glc", AMDGPU::CPol::GLC)
7269 .Case("scc", AMDGPU::CPol::SCC)
7270 .Case("slc", AMDGPU::CPol::SLC)
7271 .Default(0);
7272}
7273
7274ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7275 if (isGFX12Plus()) {
7276 SMLoc StringLoc = getLoc();
7277
7278 int64_t CPolVal = 0;
7279 ParseStatus ResTH = ParseStatus::NoMatch;
7280 ParseStatus ResScope = ParseStatus::NoMatch;
7281 ParseStatus ResNV = ParseStatus::NoMatch;
7282 ParseStatus ResScal = ParseStatus::NoMatch;
7283
7284 for (;;) {
7285 if (ResTH.isNoMatch()) {
7286 int64_t TH;
7287 ResTH = parseTH(Operands, TH);
7288 if (ResTH.isFailure())
7289 return ResTH;
7290 if (ResTH.isSuccess()) {
7291 CPolVal |= TH;
7292 continue;
7293 }
7294 }
7295
7296 if (ResScope.isNoMatch()) {
7297 int64_t Scope;
7298 ResScope = parseScope(Operands, Scope);
7299 if (ResScope.isFailure())
7300 return ResScope;
7301 if (ResScope.isSuccess()) {
7302 CPolVal |= Scope;
7303 continue;
7304 }
7305 }
7306
7307 // NV bit exists on GFX12+, but does something starting from GFX1250.
7308 // Allow parsing on all GFX12 and fail on validation for better
7309 // diagnostics.
7310 if (ResNV.isNoMatch()) {
7311 if (trySkipId("nv")) {
7312 ResNV = ParseStatus::Success;
7313 CPolVal |= CPol::NV;
7314 continue;
7315 } else if (trySkipId("no", "nv")) {
7316 ResNV = ParseStatus::Success;
7317 continue;
7318 }
7319 }
7320
7321 if (ResScal.isNoMatch()) {
7322 if (trySkipId("scale_offset")) {
7323 ResScal = ParseStatus::Success;
7324 CPolVal |= CPol::SCAL;
7325 continue;
7326 } else if (trySkipId("no", "scale_offset")) {
7327 ResScal = ParseStatus::Success;
7328 continue;
7329 }
7330 }
7331
7332 break;
7333 }
7334
7335 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7336 ResScal.isNoMatch())
7337 return ParseStatus::NoMatch;
7338
7339 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7340 AMDGPUOperand::ImmTyCPol));
7341 return ParseStatus::Success;
7342 }
7343
7344 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7345 SMLoc OpLoc = getLoc();
7346 unsigned Enabled = 0, Seen = 0;
7347 for (;;) {
7348 SMLoc S = getLoc();
7349 bool Disabling;
7350 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7351 if (!CPol)
7352 break;
7353
7354 lex();
7355
7356 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7357 return Error(S, "dlc modifier is not supported on this GPU");
7358
7359 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7360 return Error(S, "scc modifier is not supported on this GPU");
7361
7362 if (Seen & CPol)
7363 return Error(S, "duplicate cache policy modifier");
7364
7365 if (!Disabling)
7366 Enabled |= CPol;
7367
7368 Seen |= CPol;
7369 }
7370
7371 if (!Seen)
7372 return ParseStatus::NoMatch;
7373
7374 Operands.push_back(
7375 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7376 return ParseStatus::Success;
7377}
7378
7379ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7380 int64_t &Scope) {
7381 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7383
7384 ParseStatus Res = parseStringOrIntWithPrefix(
7385 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7386 Scope);
7387
7388 if (Res.isSuccess())
7389 Scope = Scopes[Scope];
7390
7391 return Res;
7392}
7393
7394ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7395 TH = AMDGPU::CPol::TH_RT; // default
7396
7397 StringRef Value;
7398 SMLoc StringLoc;
7399 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7400 if (!Res.isSuccess())
7401 return Res;
7402
7403 if (Value == "TH_DEFAULT")
7405 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7406 Value == "TH_LOAD_NT_WB") {
7407 return Error(StringLoc, "invalid th value");
7408 } else if (Value.consume_front("TH_ATOMIC_")) {
7410 } else if (Value.consume_front("TH_LOAD_")) {
7412 } else if (Value.consume_front("TH_STORE_")) {
7414 } else {
7415 return Error(StringLoc, "invalid th value");
7416 }
7417
7418 if (Value == "BYPASS")
7420
7421 if (TH != 0) {
7423 TH |= StringSwitch<int64_t>(Value)
7424 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7425 .Case("RT", AMDGPU::CPol::TH_RT)
7426 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7427 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7428 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7430 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7431 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7433 .Default(0xffffffff);
7434 else
7435 TH |= StringSwitch<int64_t>(Value)
7436 .Case("RT", AMDGPU::CPol::TH_RT)
7437 .Case("NT", AMDGPU::CPol::TH_NT)
7438 .Case("HT", AMDGPU::CPol::TH_HT)
7439 .Case("LU", AMDGPU::CPol::TH_LU)
7440 .Case("WB", AMDGPU::CPol::TH_WB)
7441 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7442 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7443 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7444 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7445 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7446 .Default(0xffffffff);
7447 }
7448
7449 if (TH == 0xffffffff)
7450 return Error(StringLoc, "invalid th value");
7451
7452 return ParseStatus::Success;
7453}
7454
7455static void
7457 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7458 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7459 std::optional<unsigned> InsertAt = std::nullopt) {
7460 auto i = OptionalIdx.find(ImmT);
7461 if (i != OptionalIdx.end()) {
7462 unsigned Idx = i->second;
7463 const AMDGPUOperand &Op =
7464 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7465 if (InsertAt)
7466 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7467 else
7468 Op.addImmOperands(Inst, 1);
7469 } else {
7470 if (InsertAt.has_value())
7471 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7472 else
7474 }
7475}
7476
7477ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7478 StringRef &Value,
7479 SMLoc &StringLoc) {
7480 if (!trySkipId(Prefix, AsmToken::Colon))
7481 return ParseStatus::NoMatch;
7482
7483 StringLoc = getLoc();
7484 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7486}
7487
7488ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7489 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7490 int64_t &IntVal) {
7491 if (!trySkipId(Name, AsmToken::Colon))
7492 return ParseStatus::NoMatch;
7493
7494 SMLoc StringLoc = getLoc();
7495
7496 StringRef Value;
7497 if (isToken(AsmToken::Identifier)) {
7498 Value = getTokenStr();
7499 lex();
7500
7501 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7502 if (Value == Ids[IntVal])
7503 break;
7504 } else if (!parseExpr(IntVal))
7505 return ParseStatus::Failure;
7506
7507 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7508 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7509
7510 return ParseStatus::Success;
7511}
7512
7513ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7514 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7515 AMDGPUOperand::ImmTy Type) {
7516 SMLoc S = getLoc();
7517 int64_t IntVal;
7518
7519 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7520 if (Res.isSuccess())
7521 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7522
7523 return Res;
7524}
7525
7526//===----------------------------------------------------------------------===//
7527// MTBUF format
7528//===----------------------------------------------------------------------===//
7529
7530bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7531 int64_t MaxVal,
7532 int64_t &Fmt) {
7533 int64_t Val;
7534 SMLoc Loc = getLoc();
7535
7536 auto Res = parseIntWithPrefix(Pref, Val);
7537 if (Res.isFailure())
7538 return false;
7539 if (Res.isNoMatch())
7540 return true;
7541
7542 if (Val < 0 || Val > MaxVal) {
7543 Error(Loc, Twine("out of range ", StringRef(Pref)));
7544 return false;
7545 }
7546
7547 Fmt = Val;
7548 return true;
7549}
7550
7551ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7552 AMDGPUOperand::ImmTy ImmTy) {
7553 const char *Pref = "index_key";
7554 int64_t ImmVal = 0;
7555 SMLoc Loc = getLoc();
7556 auto Res = parseIntWithPrefix(Pref, ImmVal);
7557 if (!Res.isSuccess())
7558 return Res;
7559
7560 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7561 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7562 (ImmVal < 0 || ImmVal > 1))
7563 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7564
7565 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7566 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7567
7568 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7569 return ParseStatus::Success;
7570}
7571
7572ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7573 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7574}
7575
7576ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7577 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7578}
7579
7580ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7581 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7582}
7583
7584ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7585 StringRef Name,
7586 AMDGPUOperand::ImmTy Type) {
7587 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7588 Type);
7589}
7590
7591ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7592 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7593 AMDGPUOperand::ImmTyMatrixAFMT);
7594}
7595
7596ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7597 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7598 AMDGPUOperand::ImmTyMatrixBFMT);
7599}
7600
7601ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7602 StringRef Name,
7603 AMDGPUOperand::ImmTy Type) {
7604 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7605 Type);
7606}
7607
7608ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7609 return tryParseMatrixScale(Operands, "matrix_a_scale",
7610 AMDGPUOperand::ImmTyMatrixAScale);
7611}
7612
7613ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7614 return tryParseMatrixScale(Operands, "matrix_b_scale",
7615 AMDGPUOperand::ImmTyMatrixBScale);
7616}
7617
7618ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7619 StringRef Name,
7620 AMDGPUOperand::ImmTy Type) {
7621 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7622 Type);
7623}
7624
7625ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7626 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7627 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7628}
7629
7630ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7631 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7632 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7633}
7634
7635// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7636// values to live in a joint format operand in the MCInst encoding.
7637ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7638 using namespace llvm::AMDGPU::MTBUFFormat;
7639
7640 int64_t Dfmt = DFMT_UNDEF;
7641 int64_t Nfmt = NFMT_UNDEF;
7642
7643 // dfmt and nfmt can appear in either order, and each is optional.
7644 for (int I = 0; I < 2; ++I) {
7645 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7646 return ParseStatus::Failure;
7647
7648 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7649 return ParseStatus::Failure;
7650
7651 // Skip optional comma between dfmt/nfmt
7652 // but guard against 2 commas following each other.
7653 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7654 !peekToken().is(AsmToken::Comma)) {
7655 trySkipToken(AsmToken::Comma);
7656 }
7657 }
7658
7659 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7660 return ParseStatus::NoMatch;
7661
7662 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7663 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7664
7665 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7666 return ParseStatus::Success;
7667}
7668
7669ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7670 using namespace llvm::AMDGPU::MTBUFFormat;
7671
7672 int64_t Fmt = UFMT_UNDEF;
7673
7674 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7675 return ParseStatus::Failure;
7676
7677 if (Fmt == UFMT_UNDEF)
7678 return ParseStatus::NoMatch;
7679
7680 Format = Fmt;
7681 return ParseStatus::Success;
7682}
7683
7684bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7685 int64_t &Nfmt,
7686 StringRef FormatStr,
7687 SMLoc Loc) {
7688 using namespace llvm::AMDGPU::MTBUFFormat;
7689 int64_t Format;
7690
7691 Format = getDfmt(FormatStr);
7692 if (Format != DFMT_UNDEF) {
7693 Dfmt = Format;
7694 return true;
7695 }
7696
7697 Format = getNfmt(FormatStr, getSTI());
7698 if (Format != NFMT_UNDEF) {
7699 Nfmt = Format;
7700 return true;
7701 }
7702
7703 Error(Loc, "unsupported format");
7704 return false;
7705}
7706
7707ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7708 SMLoc FormatLoc,
7709 int64_t &Format) {
7710 using namespace llvm::AMDGPU::MTBUFFormat;
7711
7712 int64_t Dfmt = DFMT_UNDEF;
7713 int64_t Nfmt = NFMT_UNDEF;
7714 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7715 return ParseStatus::Failure;
7716
7717 if (trySkipToken(AsmToken::Comma)) {
7718 StringRef Str;
7719 SMLoc Loc = getLoc();
7720 if (!parseId(Str, "expected a format string") ||
7721 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7722 return ParseStatus::Failure;
7723 if (Dfmt == DFMT_UNDEF)
7724 return Error(Loc, "duplicate numeric format");
7725 if (Nfmt == NFMT_UNDEF)
7726 return Error(Loc, "duplicate data format");
7727 }
7728
7729 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7730 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7731
7732 if (isGFX10Plus()) {
7733 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7734 if (Ufmt == UFMT_UNDEF)
7735 return Error(FormatLoc, "unsupported format");
7736 Format = Ufmt;
7737 } else {
7738 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7739 }
7740
7741 return ParseStatus::Success;
7742}
7743
7744ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7745 SMLoc Loc,
7746 int64_t &Format) {
7747 using namespace llvm::AMDGPU::MTBUFFormat;
7748
7749 auto Id = getUnifiedFormat(FormatStr, getSTI());
7750 if (Id == UFMT_UNDEF)
7751 return ParseStatus::NoMatch;
7752
7753 if (!isGFX10Plus())
7754 return Error(Loc, "unified format is not supported on this GPU");
7755
7756 Format = Id;
7757 return ParseStatus::Success;
7758}
7759
7760ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7761 using namespace llvm::AMDGPU::MTBUFFormat;
7762 SMLoc Loc = getLoc();
7763
7764 if (!parseExpr(Format))
7765 return ParseStatus::Failure;
7766 if (!isValidFormatEncoding(Format, getSTI()))
7767 return Error(Loc, "out of range format");
7768
7769 return ParseStatus::Success;
7770}
7771
7772ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7773 using namespace llvm::AMDGPU::MTBUFFormat;
7774
7775 if (!trySkipId("format", AsmToken::Colon))
7776 return ParseStatus::NoMatch;
7777
7778 if (trySkipToken(AsmToken::LBrac)) {
7779 StringRef FormatStr;
7780 SMLoc Loc = getLoc();
7781 if (!parseId(FormatStr, "expected a format string"))
7782 return ParseStatus::Failure;
7783
7784 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7785 if (Res.isNoMatch())
7786 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7787 if (!Res.isSuccess())
7788 return Res;
7789
7790 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7791 return ParseStatus::Failure;
7792
7793 return ParseStatus::Success;
7794 }
7795
7796 return parseNumericFormat(Format);
7797}
7798
7799ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7800 using namespace llvm::AMDGPU::MTBUFFormat;
7801
7802 int64_t Format = getDefaultFormatEncoding(getSTI());
7803 ParseStatus Res;
7804 SMLoc Loc = getLoc();
7805
7806 // Parse legacy format syntax.
7807 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7808 if (Res.isFailure())
7809 return Res;
7810
7811 bool FormatFound = Res.isSuccess();
7812
7813 Operands.push_back(
7814 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7815
7816 if (FormatFound)
7817 trySkipToken(AsmToken::Comma);
7818
7819 if (isToken(AsmToken::EndOfStatement)) {
7820 // We are expecting an soffset operand,
7821 // but let matcher handle the error.
7822 return ParseStatus::Success;
7823 }
7824
7825 // Parse soffset.
7826 Res = parseRegOrImm(Operands);
7827 if (!Res.isSuccess())
7828 return Res;
7829
7830 trySkipToken(AsmToken::Comma);
7831
7832 if (!FormatFound) {
7833 Res = parseSymbolicOrNumericFormat(Format);
7834 if (Res.isFailure())
7835 return Res;
7836 if (Res.isSuccess()) {
7837 auto Size = Operands.size();
7838 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7839 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7840 Op.setImm(Format);
7841 }
7842 return ParseStatus::Success;
7843 }
7844
7845 if (isId("format") && peekToken().is(AsmToken::Colon))
7846 return Error(getLoc(), "duplicate format");
7847 return ParseStatus::Success;
7848}
7849
7850ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7851 ParseStatus Res =
7852 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7853 if (Res.isNoMatch()) {
7854 Res = parseIntWithPrefix("inst_offset", Operands,
7855 AMDGPUOperand::ImmTyInstOffset);
7856 }
7857 return Res;
7858}
7859
7860ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7861 ParseStatus Res =
7862 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7863 if (Res.isNoMatch())
7864 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7865 return Res;
7866}
7867
7868ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7869 ParseStatus Res =
7870 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7871 if (Res.isNoMatch()) {
7872 Res =
7873 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7874 }
7875 return Res;
7876}
7877
7878//===----------------------------------------------------------------------===//
7879// Exp
7880//===----------------------------------------------------------------------===//
7881
7882void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7883 OptionalImmIndexMap OptionalIdx;
7884
7885 unsigned OperandIdx[4];
7886 unsigned EnMask = 0;
7887 int SrcIdx = 0;
7888
7889 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7890 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7891
7892 // Add the register arguments
7893 if (Op.isReg()) {
7894 assert(SrcIdx < 4);
7895 OperandIdx[SrcIdx] = Inst.size();
7896 Op.addRegOperands(Inst, 1);
7897 ++SrcIdx;
7898 continue;
7899 }
7900
7901 if (Op.isOff()) {
7902 assert(SrcIdx < 4);
7903 OperandIdx[SrcIdx] = Inst.size();
7904 Inst.addOperand(MCOperand::createReg(MCRegister()));
7905 ++SrcIdx;
7906 continue;
7907 }
7908
7909 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7910 Op.addImmOperands(Inst, 1);
7911 continue;
7912 }
7913
7914 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7915 continue;
7916
7917 // Handle optional arguments
7918 OptionalIdx[Op.getImmTy()] = i;
7919 }
7920
7921 assert(SrcIdx == 4);
7922
7923 bool Compr = false;
7924 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7925 Compr = true;
7926 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7927 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7928 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7929 }
7930
7931 for (auto i = 0; i < SrcIdx; ++i) {
7932 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7933 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7934 }
7935 }
7936
7937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7939
7940 Inst.addOperand(MCOperand::createImm(EnMask));
7941}
7942
7943//===----------------------------------------------------------------------===//
7944// s_waitcnt
7945//===----------------------------------------------------------------------===//
7946
7947static bool
7949 const AMDGPU::IsaVersion ISA,
7950 int64_t &IntVal,
7951 int64_t CntVal,
7952 bool Saturate,
7953 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7954 unsigned (*decode)(const IsaVersion &Version, unsigned))
7955{
7956 bool Failed = false;
7957
7958 IntVal = encode(ISA, IntVal, CntVal);
7959 if (CntVal != decode(ISA, IntVal)) {
7960 if (Saturate) {
7961 IntVal = encode(ISA, IntVal, -1);
7962 } else {
7963 Failed = true;
7964 }
7965 }
7966 return Failed;
7967}
7968
7969bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7970
7971 SMLoc CntLoc = getLoc();
7972 StringRef CntName = getTokenStr();
7973
7974 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7975 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7976 return false;
7977
7978 int64_t CntVal;
7979 SMLoc ValLoc = getLoc();
7980 if (!parseExpr(CntVal))
7981 return false;
7982
7983 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7984
7985 bool Failed = true;
7986 bool Sat = CntName.ends_with("_sat");
7987
7988 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7989 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7990 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7991 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7992 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7993 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7994 } else {
7995 Error(CntLoc, "invalid counter name " + CntName);
7996 return false;
7997 }
7998
7999 if (Failed) {
8000 Error(ValLoc, "too large value for " + CntName);
8001 return false;
8002 }
8003
8004 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8005 return false;
8006
8007 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8008 if (isToken(AsmToken::EndOfStatement)) {
8009 Error(getLoc(), "expected a counter name");
8010 return false;
8011 }
8012 }
8013
8014 return true;
8015}
8016
8017ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8018 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8019 int64_t Waitcnt = getWaitcntBitMask(ISA);
8020 SMLoc S = getLoc();
8021
8022 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8023 while (!isToken(AsmToken::EndOfStatement)) {
8024 if (!parseCnt(Waitcnt))
8025 return ParseStatus::Failure;
8026 }
8027 } else {
8028 if (!parseExpr(Waitcnt))
8029 return ParseStatus::Failure;
8030 }
8031
8032 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
8033 return ParseStatus::Success;
8034}
8035
8036bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8037 SMLoc FieldLoc = getLoc();
8038 StringRef FieldName = getTokenStr();
8039 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
8040 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8041 return false;
8042
8043 SMLoc ValueLoc = getLoc();
8044 StringRef ValueName = getTokenStr();
8045 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
8046 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
8047 return false;
8048
8049 unsigned Shift;
8050 if (FieldName == "instid0") {
8051 Shift = 0;
8052 } else if (FieldName == "instskip") {
8053 Shift = 4;
8054 } else if (FieldName == "instid1") {
8055 Shift = 7;
8056 } else {
8057 Error(FieldLoc, "invalid field name " + FieldName);
8058 return false;
8059 }
8060
8061 int Value;
8062 if (Shift == 4) {
8063 // Parse values for instskip.
8064 Value = StringSwitch<int>(ValueName)
8065 .Case("SAME", 0)
8066 .Case("NEXT", 1)
8067 .Case("SKIP_1", 2)
8068 .Case("SKIP_2", 3)
8069 .Case("SKIP_3", 4)
8070 .Case("SKIP_4", 5)
8071 .Default(-1);
8072 } else {
8073 // Parse values for instid0 and instid1.
8074 Value = StringSwitch<int>(ValueName)
8075 .Case("NO_DEP", 0)
8076 .Case("VALU_DEP_1", 1)
8077 .Case("VALU_DEP_2", 2)
8078 .Case("VALU_DEP_3", 3)
8079 .Case("VALU_DEP_4", 4)
8080 .Case("TRANS32_DEP_1", 5)
8081 .Case("TRANS32_DEP_2", 6)
8082 .Case("TRANS32_DEP_3", 7)
8083 .Case("FMA_ACCUM_CYCLE_1", 8)
8084 .Case("SALU_CYCLE_1", 9)
8085 .Case("SALU_CYCLE_2", 10)
8086 .Case("SALU_CYCLE_3", 11)
8087 .Default(-1);
8088 }
8089 if (Value < 0) {
8090 Error(ValueLoc, "invalid value name " + ValueName);
8091 return false;
8092 }
8093
8094 Delay |= Value << Shift;
8095 return true;
8096}
8097
8098ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8099 int64_t Delay = 0;
8100 SMLoc S = getLoc();
8101
8102 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8103 do {
8104 if (!parseDelay(Delay))
8105 return ParseStatus::Failure;
8106 } while (trySkipToken(AsmToken::Pipe));
8107 } else {
8108 if (!parseExpr(Delay))
8109 return ParseStatus::Failure;
8110 }
8111
8112 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
8113 return ParseStatus::Success;
8114}
8115
8116bool
8117AMDGPUOperand::isSWaitCnt() const {
8118 return isImm();
8119}
8120
8121bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8122
8123//===----------------------------------------------------------------------===//
8124// DepCtr
8125//===----------------------------------------------------------------------===//
8126
8127void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8128 StringRef DepCtrName) {
8129 switch (ErrorId) {
8130 case OPR_ID_UNKNOWN:
8131 Error(Loc, Twine("invalid counter name ", DepCtrName));
8132 return;
8133 case OPR_ID_UNSUPPORTED:
8134 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8135 return;
8136 case OPR_ID_DUPLICATE:
8137 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8138 return;
8139 case OPR_VAL_INVALID:
8140 Error(Loc, Twine("invalid value for ", DepCtrName));
8141 return;
8142 default:
8143 assert(false);
8144 }
8145}
8146
8147bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8148
8149 using namespace llvm::AMDGPU::DepCtr;
8150
8151 SMLoc DepCtrLoc = getLoc();
8152 StringRef DepCtrName = getTokenStr();
8153
8154 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8155 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8156 return false;
8157
8158 int64_t ExprVal;
8159 if (!parseExpr(ExprVal))
8160 return false;
8161
8162 unsigned PrevOprMask = UsedOprMask;
8163 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8164
8165 if (CntVal < 0) {
8166 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8167 return false;
8168 }
8169
8170 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8171 return false;
8172
8173 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8174 if (isToken(AsmToken::EndOfStatement)) {
8175 Error(getLoc(), "expected a counter name");
8176 return false;
8177 }
8178 }
8179
8180 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8181 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8182 return true;
8183}
8184
8185ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8186 using namespace llvm::AMDGPU::DepCtr;
8187
8188 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8189 SMLoc Loc = getLoc();
8190
8191 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8192 unsigned UsedOprMask = 0;
8193 while (!isToken(AsmToken::EndOfStatement)) {
8194 if (!parseDepCtr(DepCtr, UsedOprMask))
8195 return ParseStatus::Failure;
8196 }
8197 } else {
8198 if (!parseExpr(DepCtr))
8199 return ParseStatus::Failure;
8200 }
8201
8202 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8203 return ParseStatus::Success;
8204}
8205
8206bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8207
8208//===----------------------------------------------------------------------===//
8209// hwreg
8210//===----------------------------------------------------------------------===//
8211
8212ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8213 OperandInfoTy &Offset,
8214 OperandInfoTy &Width) {
8215 using namespace llvm::AMDGPU::Hwreg;
8216
8217 if (!trySkipId("hwreg", AsmToken::LParen))
8218 return ParseStatus::NoMatch;
8219
8220 // The register may be specified by name or using a numeric code
8221 HwReg.Loc = getLoc();
8222 if (isToken(AsmToken::Identifier) &&
8223 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8224 HwReg.IsSymbolic = true;
8225 lex(); // skip register name
8226 } else if (!parseExpr(HwReg.Val, "a register name")) {
8227 return ParseStatus::Failure;
8228 }
8229
8230 if (trySkipToken(AsmToken::RParen))
8231 return ParseStatus::Success;
8232
8233 // parse optional params
8234 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8235 return ParseStatus::Failure;
8236
8237 Offset.Loc = getLoc();
8238 if (!parseExpr(Offset.Val))
8239 return ParseStatus::Failure;
8240
8241 if (!skipToken(AsmToken::Comma, "expected a comma"))
8242 return ParseStatus::Failure;
8243
8244 Width.Loc = getLoc();
8245 if (!parseExpr(Width.Val) ||
8246 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8247 return ParseStatus::Failure;
8248
8249 return ParseStatus::Success;
8250}
8251
8252ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8253 using namespace llvm::AMDGPU::Hwreg;
8254
8255 int64_t ImmVal = 0;
8256 SMLoc Loc = getLoc();
8257
8258 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8259 HwregId::Default);
8260 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8261 HwregOffset::Default);
8262 struct : StructuredOpField {
8263 using StructuredOpField::StructuredOpField;
8264 bool validate(AMDGPUAsmParser &Parser) const override {
8265 if (!isUIntN(Width, Val - 1))
8266 return Error(Parser, "only values from 1 to 32 are legal");
8267 return true;
8268 }
8269 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8270 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8271
8272 if (Res.isNoMatch())
8273 Res = parseHwregFunc(HwReg, Offset, Width);
8274
8275 if (Res.isSuccess()) {
8276 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8277 return ParseStatus::Failure;
8278 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8279 }
8280
8281 if (Res.isNoMatch() &&
8282 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8284
8285 if (!Res.isSuccess())
8286 return ParseStatus::Failure;
8287
8288 if (!isUInt<16>(ImmVal))
8289 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8290 Operands.push_back(
8291 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8292 return ParseStatus::Success;
8293}
8294
8295bool AMDGPUOperand::isHwreg() const {
8296 return isImmTy(ImmTyHwreg);
8297}
8298
8299//===----------------------------------------------------------------------===//
8300// sendmsg
8301//===----------------------------------------------------------------------===//
8302
8303bool
8304AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8305 OperandInfoTy &Op,
8306 OperandInfoTy &Stream) {
8307 using namespace llvm::AMDGPU::SendMsg;
8308
8309 Msg.Loc = getLoc();
8310 if (isToken(AsmToken::Identifier) &&
8311 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8312 Msg.IsSymbolic = true;
8313 lex(); // skip message name
8314 } else if (!parseExpr(Msg.Val, "a message name")) {
8315 return false;
8316 }
8317
8318 if (trySkipToken(AsmToken::Comma)) {
8319 Op.IsDefined = true;
8320 Op.Loc = getLoc();
8321 if (isToken(AsmToken::Identifier) &&
8322 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8324 lex(); // skip operation name
8325 } else if (!parseExpr(Op.Val, "an operation name")) {
8326 return false;
8327 }
8328
8329 if (trySkipToken(AsmToken::Comma)) {
8330 Stream.IsDefined = true;
8331 Stream.Loc = getLoc();
8332 if (!parseExpr(Stream.Val))
8333 return false;
8334 }
8335 }
8336
8337 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8338}
8339
8340bool
8341AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8342 const OperandInfoTy &Op,
8343 const OperandInfoTy &Stream) {
8344 using namespace llvm::AMDGPU::SendMsg;
8345
8346 // Validation strictness depends on whether message is specified
8347 // in a symbolic or in a numeric form. In the latter case
8348 // only encoding possibility is checked.
8349 bool Strict = Msg.IsSymbolic;
8350
8351 if (Strict) {
8352 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8353 Error(Msg.Loc, "specified message id is not supported on this GPU");
8354 return false;
8355 }
8356 } else {
8357 if (!isValidMsgId(Msg.Val, getSTI())) {
8358 Error(Msg.Loc, "invalid message id");
8359 return false;
8360 }
8361 }
8362 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8363 if (Op.IsDefined) {
8364 Error(Op.Loc, "message does not support operations");
8365 } else {
8366 Error(Msg.Loc, "missing message operation");
8367 }
8368 return false;
8369 }
8370 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8371 if (Op.Val == OPR_ID_UNSUPPORTED)
8372 Error(Op.Loc, "specified operation id is not supported on this GPU");
8373 else
8374 Error(Op.Loc, "invalid operation id");
8375 return false;
8376 }
8377 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8378 Stream.IsDefined) {
8379 Error(Stream.Loc, "message operation does not support streams");
8380 return false;
8381 }
8382 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8383 Error(Stream.Loc, "invalid message stream id");
8384 return false;
8385 }
8386 return true;
8387}
8388
8389ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8390 using namespace llvm::AMDGPU::SendMsg;
8391
8392 int64_t ImmVal = 0;
8393 SMLoc Loc = getLoc();
8394
8395 if (trySkipId("sendmsg", AsmToken::LParen)) {
8396 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8397 OperandInfoTy Op(OP_NONE_);
8398 OperandInfoTy Stream(STREAM_ID_NONE_);
8399 if (parseSendMsgBody(Msg, Op, Stream) &&
8400 validateSendMsg(Msg, Op, Stream)) {
8401 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8402 } else {
8403 return ParseStatus::Failure;
8404 }
8405 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8406 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8407 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8408 } else {
8409 return ParseStatus::Failure;
8410 }
8411
8412 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8413 return ParseStatus::Success;
8414}
8415
8416bool AMDGPUOperand::isSendMsg() const {
8417 return isImmTy(ImmTySendMsg);
8418}
8419
8420ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8421 using namespace llvm::AMDGPU::WaitEvent;
8422
8423 SMLoc Loc = getLoc();
8424 int64_t ImmVal = 0;
8425
8426 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8427 1, 0);
8428 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8429
8430 StructuredOpField *TargetBitfield =
8431 isGFX11() ? &DontWaitExportReady : &ExportReady;
8432
8433 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8434 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8436 else if (Res.isSuccess()) {
8437 if (!validateStructuredOpFields({TargetBitfield}))
8438 return ParseStatus::Failure;
8439 ImmVal = TargetBitfield->Val;
8440 }
8441
8442 if (!Res.isSuccess())
8443 return ParseStatus::Failure;
8444
8445 if (!isUInt<16>(ImmVal))
8446 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8447
8448 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8449 AMDGPUOperand::ImmTyWaitEvent));
8450 return ParseStatus::Success;
8451}
8452
8453bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8454
8455//===----------------------------------------------------------------------===//
8456// v_interp
8457//===----------------------------------------------------------------------===//
8458
8459ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8460 StringRef Str;
8461 SMLoc S = getLoc();
8462
8463 if (!parseId(Str))
8464 return ParseStatus::NoMatch;
8465
8466 int Slot = StringSwitch<int>(Str)
8467 .Case("p10", 0)
8468 .Case("p20", 1)
8469 .Case("p0", 2)
8470 .Default(-1);
8471
8472 if (Slot == -1)
8473 return Error(S, "invalid interpolation slot");
8474
8475 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8476 AMDGPUOperand::ImmTyInterpSlot));
8477 return ParseStatus::Success;
8478}
8479
8480ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8481 StringRef Str;
8482 SMLoc S = getLoc();
8483
8484 if (!parseId(Str))
8485 return ParseStatus::NoMatch;
8486
8487 if (!Str.starts_with("attr"))
8488 return Error(S, "invalid interpolation attribute");
8489
8490 StringRef Chan = Str.take_back(2);
8491 int AttrChan = StringSwitch<int>(Chan)
8492 .Case(".x", 0)
8493 .Case(".y", 1)
8494 .Case(".z", 2)
8495 .Case(".w", 3)
8496 .Default(-1);
8497 if (AttrChan == -1)
8498 return Error(S, "invalid or missing interpolation attribute channel");
8499
8500 Str = Str.drop_back(2).drop_front(4);
8501
8502 uint8_t Attr;
8503 if (Str.getAsInteger(10, Attr))
8504 return Error(S, "invalid or missing interpolation attribute number");
8505
8506 if (Attr > 32)
8507 return Error(S, "out of bounds interpolation attribute number");
8508
8509 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8510
8511 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8512 AMDGPUOperand::ImmTyInterpAttr));
8513 Operands.push_back(AMDGPUOperand::CreateImm(
8514 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8515 return ParseStatus::Success;
8516}
8517
8518//===----------------------------------------------------------------------===//
8519// exp
8520//===----------------------------------------------------------------------===//
8521
8522ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8523 using namespace llvm::AMDGPU::Exp;
8524
8525 StringRef Str;
8526 SMLoc S = getLoc();
8527
8528 if (!parseId(Str))
8529 return ParseStatus::NoMatch;
8530
8531 unsigned Id = getTgtId(Str);
8532 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8533 return Error(S, (Id == ET_INVALID)
8534 ? "invalid exp target"
8535 : "exp target is not supported on this GPU");
8536
8537 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8538 AMDGPUOperand::ImmTyExpTgt));
8539 return ParseStatus::Success;
8540}
8541
8542//===----------------------------------------------------------------------===//
8543// parser helpers
8544//===----------------------------------------------------------------------===//
8545
8546bool
8547AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8548 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8549}
8550
8551bool
8552AMDGPUAsmParser::isId(const StringRef Id) const {
8553 return isId(getToken(), Id);
8554}
8555
8556bool
8557AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8558 return getTokenKind() == Kind;
8559}
8560
8561StringRef AMDGPUAsmParser::getId() const {
8562 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8563}
8564
8565bool
8566AMDGPUAsmParser::trySkipId(const StringRef Id) {
8567 if (isId(Id)) {
8568 lex();
8569 return true;
8570 }
8571 return false;
8572}
8573
8574bool
8575AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8576 if (isToken(AsmToken::Identifier)) {
8577 StringRef Tok = getTokenStr();
8578 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8579 lex();
8580 return true;
8581 }
8582 }
8583 return false;
8584}
8585
8586bool
8587AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8588 if (isId(Id) && peekToken().is(Kind)) {
8589 lex();
8590 lex();
8591 return true;
8592 }
8593 return false;
8594}
8595
8596bool
8597AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8598 if (isToken(Kind)) {
8599 lex();
8600 return true;
8601 }
8602 return false;
8603}
8604
8605bool
8606AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8607 const StringRef ErrMsg) {
8608 if (!trySkipToken(Kind)) {
8609 Error(getLoc(), ErrMsg);
8610 return false;
8611 }
8612 return true;
8613}
8614
8615bool
8616AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8617 SMLoc S = getLoc();
8618
8619 const MCExpr *Expr;
8620 if (Parser.parseExpression(Expr))
8621 return false;
8622
8623 if (Expr->evaluateAsAbsolute(Imm))
8624 return true;
8625
8626 if (Expected.empty()) {
8627 Error(S, "expected absolute expression");
8628 } else {
8629 Error(S, Twine("expected ", Expected) +
8630 Twine(" or an absolute expression"));
8631 }
8632 return false;
8633}
8634
8635bool
8636AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8637 SMLoc S = getLoc();
8638
8639 const MCExpr *Expr;
8640 if (Parser.parseExpression(Expr))
8641 return false;
8642
8643 int64_t IntVal;
8644 if (Expr->evaluateAsAbsolute(IntVal)) {
8645 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8646 } else {
8647 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8648 }
8649 return true;
8650}
8651
8652bool
8653AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8654 if (isToken(AsmToken::String)) {
8655 Val = getToken().getStringContents();
8656 lex();
8657 return true;
8658 }
8659 Error(getLoc(), ErrMsg);
8660 return false;
8661}
8662
8663bool
8664AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8665 if (isToken(AsmToken::Identifier)) {
8666 Val = getTokenStr();
8667 lex();
8668 return true;
8669 }
8670 if (!ErrMsg.empty())
8671 Error(getLoc(), ErrMsg);
8672 return false;
8673}
8674
8675AsmToken
8676AMDGPUAsmParser::getToken() const {
8677 return Parser.getTok();
8678}
8679
8680AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8681 return isToken(AsmToken::EndOfStatement)
8682 ? getToken()
8683 : getLexer().peekTok(ShouldSkipSpace);
8684}
8685
8686void
8687AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8688 auto TokCount = getLexer().peekTokens(Tokens);
8689
8690 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8691 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8692}
8693
8695AMDGPUAsmParser::getTokenKind() const {
8696 return getLexer().getKind();
8697}
8698
8699SMLoc
8700AMDGPUAsmParser::getLoc() const {
8701 return getToken().getLoc();
8702}
8703
8704StringRef
8705AMDGPUAsmParser::getTokenStr() const {
8706 return getToken().getString();
8707}
8708
8709void
8710AMDGPUAsmParser::lex() {
8711 Parser.Lex();
8712}
8713
8714const AMDGPUOperand &
8715AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8716 int MCOpIdx) const {
8717 for (const auto &Op : Operands) {
8718 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8719 if (TargetOp.getMCOpIdx() == MCOpIdx)
8720 return TargetOp;
8721 }
8722 llvm_unreachable("no such MC operand!");
8723}
8724
8725SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8726 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8727}
8728
8729// Returns one of the given locations that comes later in the source.
8730SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8731 return a.getPointer() < b.getPointer() ? b : a;
8732}
8733
8734SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8735 int MCOpIdx) const {
8736 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8737}
8738
8739SMLoc
8740AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8741 const OperandVector &Operands) const {
8742 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8743 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8744 if (Test(Op))
8745 return Op.getStartLoc();
8746 }
8747 return getInstLoc(Operands);
8748}
8749
8750SMLoc
8751AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8752 const OperandVector &Operands) const {
8753 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8754 return getOperandLoc(Test, Operands);
8755}
8756
8757ParseStatus
8758AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8759 if (!trySkipToken(AsmToken::LCurly))
8760 return ParseStatus::NoMatch;
8761
8762 bool First = true;
8763 while (!trySkipToken(AsmToken::RCurly)) {
8764 if (!First &&
8765 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8766 return ParseStatus::Failure;
8767
8768 StringRef Id = getTokenStr();
8769 SMLoc IdLoc = getLoc();
8770 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8771 !skipToken(AsmToken::Colon, "colon expected"))
8772 return ParseStatus::Failure;
8773
8774 const auto *I =
8775 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8776 if (I == Fields.end())
8777 return Error(IdLoc, "unknown field");
8778 if ((*I)->IsDefined)
8779 return Error(IdLoc, "duplicate field");
8780
8781 // TODO: Support symbolic values.
8782 (*I)->Loc = getLoc();
8783 if (!parseExpr((*I)->Val))
8784 return ParseStatus::Failure;
8785 (*I)->IsDefined = true;
8786
8787 First = false;
8788 }
8789 return ParseStatus::Success;
8790}
8791
8792bool AMDGPUAsmParser::validateStructuredOpFields(
8794 return all_of(Fields, [this](const StructuredOpField *F) {
8795 return F->validate(*this);
8796 });
8797}
8798
8799//===----------------------------------------------------------------------===//
8800// swizzle
8801//===----------------------------------------------------------------------===//
8802
8804static unsigned
8805encodeBitmaskPerm(const unsigned AndMask,
8806 const unsigned OrMask,
8807 const unsigned XorMask) {
8808 using namespace llvm::AMDGPU::Swizzle;
8809
8810 return BITMASK_PERM_ENC |
8811 (AndMask << BITMASK_AND_SHIFT) |
8812 (OrMask << BITMASK_OR_SHIFT) |
8813 (XorMask << BITMASK_XOR_SHIFT);
8814}
8815
8816bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8817 const unsigned MaxVal,
8818 const Twine &ErrMsg, SMLoc &Loc) {
8819 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8820 return false;
8821 }
8822 Loc = getLoc();
8823 if (!parseExpr(Op)) {
8824 return false;
8825 }
8826 if (Op < MinVal || Op > MaxVal) {
8827 Error(Loc, ErrMsg);
8828 return false;
8829 }
8830
8831 return true;
8832}
8833
8834bool
8835AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8836 const unsigned MinVal,
8837 const unsigned MaxVal,
8838 const StringRef ErrMsg) {
8839 SMLoc Loc;
8840 for (unsigned i = 0; i < OpNum; ++i) {
8841 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8842 return false;
8843 }
8844
8845 return true;
8846}
8847
8848bool
8849AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8850 using namespace llvm::AMDGPU::Swizzle;
8851
8852 int64_t Lane[LANE_NUM];
8853 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8854 "expected a 2-bit lane id")) {
8856 for (unsigned I = 0; I < LANE_NUM; ++I) {
8857 Imm |= Lane[I] << (LANE_SHIFT * I);
8858 }
8859 return true;
8860 }
8861 return false;
8862}
8863
8864bool
8865AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8866 using namespace llvm::AMDGPU::Swizzle;
8867
8868 SMLoc Loc;
8869 int64_t GroupSize;
8870 int64_t LaneIdx;
8871
8872 if (!parseSwizzleOperand(GroupSize,
8873 2, 32,
8874 "group size must be in the interval [2,32]",
8875 Loc)) {
8876 return false;
8877 }
8878 if (!isPowerOf2_64(GroupSize)) {
8879 Error(Loc, "group size must be a power of two");
8880 return false;
8881 }
8882 if (parseSwizzleOperand(LaneIdx,
8883 0, GroupSize - 1,
8884 "lane id must be in the interval [0,group size - 1]",
8885 Loc)) {
8886 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8887 return true;
8888 }
8889 return false;
8890}
8891
8892bool
8893AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8894 using namespace llvm::AMDGPU::Swizzle;
8895
8896 SMLoc Loc;
8897 int64_t GroupSize;
8898
8899 if (!parseSwizzleOperand(GroupSize,
8900 2, 32,
8901 "group size must be in the interval [2,32]",
8902 Loc)) {
8903 return false;
8904 }
8905 if (!isPowerOf2_64(GroupSize)) {
8906 Error(Loc, "group size must be a power of two");
8907 return false;
8908 }
8909
8910 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8911 return true;
8912}
8913
8914bool
8915AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8916 using namespace llvm::AMDGPU::Swizzle;
8917
8918 SMLoc Loc;
8919 int64_t GroupSize;
8920
8921 if (!parseSwizzleOperand(GroupSize,
8922 1, 16,
8923 "group size must be in the interval [1,16]",
8924 Loc)) {
8925 return false;
8926 }
8927 if (!isPowerOf2_64(GroupSize)) {
8928 Error(Loc, "group size must be a power of two");
8929 return false;
8930 }
8931
8932 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8933 return true;
8934}
8935
8936bool
8937AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8938 using namespace llvm::AMDGPU::Swizzle;
8939
8940 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8941 return false;
8942 }
8943
8944 StringRef Ctl;
8945 SMLoc StrLoc = getLoc();
8946 if (!parseString(Ctl)) {
8947 return false;
8948 }
8949 if (Ctl.size() != BITMASK_WIDTH) {
8950 Error(StrLoc, "expected a 5-character mask");
8951 return false;
8952 }
8953
8954 unsigned AndMask = 0;
8955 unsigned OrMask = 0;
8956 unsigned XorMask = 0;
8957
8958 for (size_t i = 0; i < Ctl.size(); ++i) {
8959 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8960 switch(Ctl[i]) {
8961 default:
8962 Error(StrLoc, "invalid mask");
8963 return false;
8964 case '0':
8965 break;
8966 case '1':
8967 OrMask |= Mask;
8968 break;
8969 case 'p':
8970 AndMask |= Mask;
8971 break;
8972 case 'i':
8973 AndMask |= Mask;
8974 XorMask |= Mask;
8975 break;
8976 }
8977 }
8978
8979 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8980 return true;
8981}
8982
8983bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8984 using namespace llvm::AMDGPU::Swizzle;
8985
8986 if (!AMDGPU::isGFX9Plus(getSTI())) {
8987 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8988 return false;
8989 }
8990
8991 int64_t Swizzle;
8992 SMLoc Loc;
8993 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8994 "FFT swizzle must be in the interval [0," +
8995 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8996 Loc))
8997 return false;
8998
8999 Imm = FFT_MODE_ENC | Swizzle;
9000 return true;
9001}
9002
9003bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9004 using namespace llvm::AMDGPU::Swizzle;
9005
9006 if (!AMDGPU::isGFX9Plus(getSTI())) {
9007 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
9008 return false;
9009 }
9010
9011 SMLoc Loc;
9012 int64_t Direction;
9013
9014 if (!parseSwizzleOperand(Direction, 0, 1,
9015 "direction must be 0 (left) or 1 (right)", Loc))
9016 return false;
9017
9018 int64_t RotateSize;
9019 if (!parseSwizzleOperand(
9020 RotateSize, 0, ROTATE_MAX_SIZE,
9021 "number of threads to rotate must be in the interval [0," +
9022 Twine(ROTATE_MAX_SIZE) + Twine(']'),
9023 Loc))
9024 return false;
9025
9027 (RotateSize << ROTATE_SIZE_SHIFT);
9028 return true;
9029}
9030
9031bool
9032AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9033
9034 SMLoc OffsetLoc = getLoc();
9035
9036 if (!parseExpr(Imm, "a swizzle macro")) {
9037 return false;
9038 }
9039 if (!isUInt<16>(Imm)) {
9040 Error(OffsetLoc, "expected a 16-bit offset");
9041 return false;
9042 }
9043 return true;
9044}
9045
9046bool
9047AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9048 using namespace llvm::AMDGPU::Swizzle;
9049
9050 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
9051
9052 SMLoc ModeLoc = getLoc();
9053 bool Ok = false;
9054
9055 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9056 Ok = parseSwizzleQuadPerm(Imm);
9057 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9058 Ok = parseSwizzleBitmaskPerm(Imm);
9059 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9060 Ok = parseSwizzleBroadcast(Imm);
9061 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
9062 Ok = parseSwizzleSwap(Imm);
9063 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9064 Ok = parseSwizzleReverse(Imm);
9065 } else if (trySkipId(IdSymbolic[ID_FFT])) {
9066 Ok = parseSwizzleFFT(Imm);
9067 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9068 Ok = parseSwizzleRotate(Imm);
9069 } else {
9070 Error(ModeLoc, "expected a swizzle mode");
9071 }
9072
9073 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
9074 }
9075
9076 return false;
9077}
9078
9079ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9080 SMLoc S = getLoc();
9081 int64_t Imm = 0;
9082
9083 if (trySkipId("offset")) {
9084
9085 bool Ok = false;
9086 if (skipToken(AsmToken::Colon, "expected a colon")) {
9087 if (trySkipId("swizzle")) {
9088 Ok = parseSwizzleMacro(Imm);
9089 } else {
9090 Ok = parseSwizzleOffset(Imm);
9091 }
9092 }
9093
9094 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
9095
9097 }
9098 return ParseStatus::NoMatch;
9099}
9100
9101bool
9102AMDGPUOperand::isSwizzle() const {
9103 return isImmTy(ImmTySwizzle);
9104}
9105
9106//===----------------------------------------------------------------------===//
9107// VGPR Index Mode
9108//===----------------------------------------------------------------------===//
9109
9110int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9111
9112 using namespace llvm::AMDGPU::VGPRIndexMode;
9113
9114 if (trySkipToken(AsmToken::RParen)) {
9115 return OFF;
9116 }
9117
9118 int64_t Imm = 0;
9119
9120 while (true) {
9121 unsigned Mode = 0;
9122 SMLoc S = getLoc();
9123
9124 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9125 if (trySkipId(IdSymbolic[ModeId])) {
9126 Mode = 1 << ModeId;
9127 break;
9128 }
9129 }
9130
9131 if (Mode == 0) {
9132 Error(S, (Imm == 0)?
9133 "expected a VGPR index mode or a closing parenthesis" :
9134 "expected a VGPR index mode");
9135 return UNDEF;
9136 }
9137
9138 if (Imm & Mode) {
9139 Error(S, "duplicate VGPR index mode");
9140 return UNDEF;
9141 }
9142 Imm |= Mode;
9143
9144 if (trySkipToken(AsmToken::RParen))
9145 break;
9146 if (!skipToken(AsmToken::Comma,
9147 "expected a comma or a closing parenthesis"))
9148 return UNDEF;
9149 }
9150
9151 return Imm;
9152}
9153
9154ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9155
9156 using namespace llvm::AMDGPU::VGPRIndexMode;
9157
9158 int64_t Imm = 0;
9159 SMLoc S = getLoc();
9160
9161 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9162 Imm = parseGPRIdxMacro();
9163 if (Imm == UNDEF)
9164 return ParseStatus::Failure;
9165 } else {
9166 if (getParser().parseAbsoluteExpression(Imm))
9167 return ParseStatus::Failure;
9168 if (Imm < 0 || !isUInt<4>(Imm))
9169 return Error(S, "invalid immediate: only 4-bit values are legal");
9170 }
9171
9172 Operands.push_back(
9173 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9174 return ParseStatus::Success;
9175}
9176
9177bool AMDGPUOperand::isGPRIdxMode() const {
9178 return isImmTy(ImmTyGprIdxMode);
9179}
9180
9181//===----------------------------------------------------------------------===//
9182// sopp branch targets
9183//===----------------------------------------------------------------------===//
9184
9185ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9186
9187 // Make sure we are not parsing something
9188 // that looks like a label or an expression but is not.
9189 // This will improve error messages.
9190 if (isRegister() || isModifier())
9191 return ParseStatus::NoMatch;
9192
9193 if (!parseExpr(Operands))
9194 return ParseStatus::Failure;
9195
9196 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9197 assert(Opr.isImm() || Opr.isExpr());
9198 SMLoc Loc = Opr.getStartLoc();
9199
9200 // Currently we do not support arbitrary expressions as branch targets.
9201 // Only labels and absolute expressions are accepted.
9202 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9203 Error(Loc, "expected an absolute expression or a label");
9204 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9205 Error(Loc, "expected a 16-bit signed jump offset");
9206 }
9207
9208 return ParseStatus::Success;
9209}
9210
9211//===----------------------------------------------------------------------===//
9212// Boolean holding registers
9213//===----------------------------------------------------------------------===//
9214
9215ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9216 return parseReg(Operands);
9217}
9218
9219//===----------------------------------------------------------------------===//
9220// mubuf
9221//===----------------------------------------------------------------------===//
9222
9223void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9224 const OperandVector &Operands,
9225 bool IsAtomic) {
9226 OptionalImmIndexMap OptionalIdx;
9227 unsigned FirstOperandIdx = 1;
9228 bool IsAtomicReturn = false;
9229
9230 if (IsAtomic) {
9231 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9233 }
9234
9235 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9236 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9237
9238 // Add the register arguments
9239 if (Op.isReg()) {
9240 Op.addRegOperands(Inst, 1);
9241 // Insert a tied src for atomic return dst.
9242 // This cannot be postponed as subsequent calls to
9243 // addImmOperands rely on correct number of MC operands.
9244 if (IsAtomicReturn && i == FirstOperandIdx)
9245 Op.addRegOperands(Inst, 1);
9246 continue;
9247 }
9248
9249 // Handle the case where soffset is an immediate
9250 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9251 Op.addImmOperands(Inst, 1);
9252 continue;
9253 }
9254
9255 // Handle tokens like 'offen' which are sometimes hard-coded into the
9256 // asm string. There are no MCInst operands for these.
9257 if (Op.isToken()) {
9258 continue;
9259 }
9260 assert(Op.isImm());
9261
9262 // Handle optional arguments
9263 OptionalIdx[Op.getImmTy()] = i;
9264 }
9265
9266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9268 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9269 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9271}
9272
9273//===----------------------------------------------------------------------===//
9274// smrd
9275//===----------------------------------------------------------------------===//
9276
9277bool AMDGPUOperand::isSMRDOffset8() const {
9278 return isImmLiteral() && isUInt<8>(getImm());
9279}
9280
9281bool AMDGPUOperand::isSMEMOffset() const {
9282 // Offset range is checked later by validator.
9283 return isImmLiteral();
9284}
9285
9286bool AMDGPUOperand::isSMRDLiteralOffset() const {
9287 // 32-bit literals are only supported on CI and we only want to use them
9288 // when the offset is > 8-bits.
9289 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9290}
9291
9292//===----------------------------------------------------------------------===//
9293// vop3
9294//===----------------------------------------------------------------------===//
9295
9296static bool ConvertOmodMul(int64_t &Mul) {
9297 if (Mul != 1 && Mul != 2 && Mul != 4)
9298 return false;
9299
9300 Mul >>= 1;
9301 return true;
9302}
9303
9304static bool ConvertOmodDiv(int64_t &Div) {
9305 if (Div == 1) {
9306 Div = 0;
9307 return true;
9308 }
9309
9310 if (Div == 2) {
9311 Div = 3;
9312 return true;
9313 }
9314
9315 return false;
9316}
9317
9318// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9319// This is intentional and ensures compatibility with sp3.
9320// See bug 35397 for details.
9321bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9322 if (BoundCtrl == 0 || BoundCtrl == 1) {
9323 if (!isGFX11Plus())
9324 BoundCtrl = 1;
9325 return true;
9326 }
9327 return false;
9328}
9329
9330void AMDGPUAsmParser::onBeginOfFile() {
9331 if (!getParser().getStreamer().getTargetStreamer() ||
9332 getSTI().getTargetTriple().getArch() == Triple::r600)
9333 return;
9334
9335 if (!getTargetStreamer().getTargetID())
9336 getTargetStreamer().initializeTargetID(getSTI(),
9337 getSTI().getFeatureString());
9338
9339 if (isHsaAbi(getSTI()))
9340 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9341}
9342
9343/// Parse AMDGPU specific expressions.
9344///
9345/// expr ::= or(expr, ...) |
9346/// max(expr, ...)
9347///
9348bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9349 using AGVK = AMDGPUMCExpr::VariantKind;
9350
9351 if (isToken(AsmToken::Identifier)) {
9352 StringRef TokenId = getTokenStr();
9353 AGVK VK = StringSwitch<AGVK>(TokenId)
9354 .Case("max", AGVK::AGVK_Max)
9355 .Case("or", AGVK::AGVK_Or)
9356 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9357 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9358 .Case("alignto", AGVK::AGVK_AlignTo)
9359 .Case("occupancy", AGVK::AGVK_Occupancy)
9360 .Default(AGVK::AGVK_None);
9361
9362 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9364 uint64_t CommaCount = 0;
9365 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9366 lex(); // Eat '('
9367 while (true) {
9368 if (trySkipToken(AsmToken::RParen)) {
9369 if (Exprs.empty()) {
9370 Error(getToken().getLoc(),
9371 "empty " + Twine(TokenId) + " expression");
9372 return true;
9373 }
9374 if (CommaCount + 1 != Exprs.size()) {
9375 Error(getToken().getLoc(),
9376 "mismatch of commas in " + Twine(TokenId) + " expression");
9377 return true;
9378 }
9379 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9380 return false;
9381 }
9382 const MCExpr *Expr;
9383 if (getParser().parseExpression(Expr, EndLoc))
9384 return true;
9385 Exprs.push_back(Expr);
9386 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9387 if (LastTokenWasComma)
9388 CommaCount++;
9389 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9390 Error(getToken().getLoc(),
9391 "unexpected token in " + Twine(TokenId) + " expression");
9392 return true;
9393 }
9394 }
9395 }
9396 }
9397 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9398}
9399
9400ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9401 StringRef Name = getTokenStr();
9402 if (Name == "mul") {
9403 return parseIntWithPrefix("mul", Operands,
9404 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9405 }
9406
9407 if (Name == "div") {
9408 return parseIntWithPrefix("div", Operands,
9409 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9410 }
9411
9412 return ParseStatus::NoMatch;
9413}
9414
9415// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9416// the number of src operands present, then copies that bit into src0_modifiers.
9417static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9418 int Opc = Inst.getOpcode();
9419 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9420 if (OpSelIdx == -1)
9421 return;
9422
9423 int SrcNum;
9424 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9425 AMDGPU::OpName::src2};
9426 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9427 ++SrcNum)
9428 ;
9429 assert(SrcNum > 0);
9430
9431 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9432
9433 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9434 if (DstIdx == -1)
9435 return;
9436
9437 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9438 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9439 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9440 if (DstOp.isReg() &&
9441 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9442 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9443 ModVal |= SISrcMods::DST_OP_SEL;
9444 } else {
9445 if ((OpSel & (1 << SrcNum)) != 0)
9446 ModVal |= SISrcMods::DST_OP_SEL;
9447 }
9448 Inst.getOperand(ModIdx).setImm(ModVal);
9449}
9450
9451void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9452 const OperandVector &Operands) {
9453 cvtVOP3P(Inst, Operands);
9454 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9455}
9456
9457void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9458 OptionalImmIndexMap &OptionalIdx) {
9459 cvtVOP3P(Inst, Operands, OptionalIdx);
9460 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9461}
9462
9463static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9464 return
9465 // 1. This operand is input modifiers
9466 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9467 // 2. This is not last operand
9468 && Desc.NumOperands > (OpNum + 1)
9469 // 3. Next operand is register class
9470 && Desc.operands()[OpNum + 1].RegClass != -1
9471 // 4. Next register is not tied to any other operand
9472 && Desc.getOperandConstraint(OpNum + 1,
9474}
9475
9476void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9477 unsigned Opc = Inst.getOpcode();
9478 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9479 AMDGPU::OpName::src2};
9480 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9481 AMDGPU::OpName::src1_modifiers,
9482 AMDGPU::OpName::src2_modifiers};
9483 for (int J = 0; J < 3; ++J) {
9484 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9485 if (OpIdx == -1)
9486 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9487 // no src1. So continue instead of break.
9488 continue;
9489
9490 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9491 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9492
9493 if ((OpSel & (1 << J)) != 0)
9494 ModVal |= SISrcMods::OP_SEL_0;
9495 // op_sel[3] is encoded in src0_modifiers.
9496 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9497 ModVal |= SISrcMods::DST_OP_SEL;
9498
9499 Inst.getOperand(ModIdx).setImm(ModVal);
9500 }
9501}
9502
9503void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9504{
9505 OptionalImmIndexMap OptionalIdx;
9506 unsigned Opc = Inst.getOpcode();
9507
9508 unsigned I = 1;
9509 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9510 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9511 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9512 }
9513
9514 for (unsigned E = Operands.size(); I != E; ++I) {
9515 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9517 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9518 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9519 Op.isInterpAttrChan()) {
9520 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9521 } else if (Op.isImmModifier()) {
9522 OptionalIdx[Op.getImmTy()] = I;
9523 } else {
9524 llvm_unreachable("unhandled operand type");
9525 }
9526 }
9527
9528 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9529 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9530 AMDGPUOperand::ImmTyHigh);
9531
9532 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9533 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9534 AMDGPUOperand::ImmTyClamp);
9535
9536 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9537 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9538 AMDGPUOperand::ImmTyOModSI);
9539
9540 // Some v_interp instructions use op_sel[3] for dst.
9541 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9542 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9543 AMDGPUOperand::ImmTyOpSel);
9544 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9545 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9546
9547 cvtOpSelHelper(Inst, OpSel);
9548 }
9549}
9550
9551void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9552{
9553 OptionalImmIndexMap OptionalIdx;
9554 unsigned Opc = Inst.getOpcode();
9555
9556 unsigned I = 1;
9557 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9558 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9559 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9560 }
9561
9562 for (unsigned E = Operands.size(); I != E; ++I) {
9563 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9565 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9566 } else if (Op.isImmModifier()) {
9567 OptionalIdx[Op.getImmTy()] = I;
9568 } else {
9569 llvm_unreachable("unhandled operand type");
9570 }
9571 }
9572
9573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9574
9575 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9576 if (OpSelIdx != -1)
9577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9578
9579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9580
9581 if (OpSelIdx == -1)
9582 return;
9583
9584 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9585 cvtOpSelHelper(Inst, OpSel);
9586}
9587
9588void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9589 const OperandVector &Operands) {
9590 OptionalImmIndexMap OptionalIdx;
9591 unsigned Opc = Inst.getOpcode();
9592 unsigned I = 1;
9593 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9594
9595 const MCInstrDesc &Desc = MII.get(Opc);
9596
9597 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9598 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9599
9600 for (unsigned E = Operands.size(); I != E; ++I) {
9601 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9602 int NumOperands = Inst.getNumOperands();
9603 // The order of operands in MCInst and parsed operands are different.
9604 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9605 // indices for parsing scale values correctly.
9606 if (NumOperands == CbszOpIdx) {
9609 }
9610 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9611 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9612 } else if (Op.isImmModifier()) {
9613 OptionalIdx[Op.getImmTy()] = I;
9614 } else {
9615 Op.addRegOrImmOperands(Inst, 1);
9616 }
9617 }
9618
9619 // Insert CBSZ and BLGP operands for F8F6F4 variants
9620 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9621 if (CbszIdx != OptionalIdx.end()) {
9622 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9623 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9624 }
9625
9626 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9627 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9628 if (BlgpIdx != OptionalIdx.end()) {
9629 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9630 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9631 }
9632
9633 // Add dummy src_modifiers
9636
9637 // Handle op_sel fields
9638
9639 unsigned OpSel = 0;
9640 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9641 if (OpselIdx != OptionalIdx.end()) {
9642 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9643 .getImm();
9644 }
9645
9646 unsigned OpSelHi = 0;
9647 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9648 if (OpselHiIdx != OptionalIdx.end()) {
9649 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9650 .getImm();
9651 }
9652 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9653 AMDGPU::OpName::src1_modifiers};
9654
9655 for (unsigned J = 0; J < 2; ++J) {
9656 unsigned ModVal = 0;
9657 if (OpSel & (1 << J))
9658 ModVal |= SISrcMods::OP_SEL_0;
9659 if (OpSelHi & (1 << J))
9660 ModVal |= SISrcMods::OP_SEL_1;
9661
9662 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9663 Inst.getOperand(ModIdx).setImm(ModVal);
9664 }
9665}
9666
9667void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9668 OptionalImmIndexMap &OptionalIdx) {
9669 unsigned Opc = Inst.getOpcode();
9670
9671 unsigned I = 1;
9672 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9673 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9674 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9675 }
9676
9677 for (unsigned E = Operands.size(); I != E; ++I) {
9678 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9680 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9681 } else if (Op.isImmModifier()) {
9682 OptionalIdx[Op.getImmTy()] = I;
9683 } else {
9684 Op.addRegOrImmOperands(Inst, 1);
9685 }
9686 }
9687
9688 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9689 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9690 AMDGPUOperand::ImmTyScaleSel);
9691
9692 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9693 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9694 AMDGPUOperand::ImmTyClamp);
9695
9696 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9697 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9698 Inst.addOperand(Inst.getOperand(0));
9699 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9700 AMDGPUOperand::ImmTyByteSel);
9701 }
9702
9703 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9704 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9705 AMDGPUOperand::ImmTyOModSI);
9706
9707 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9708 // it has src2 register operand that is tied to dst operand
9709 // we don't allow modifiers for this operand in assembler so src2_modifiers
9710 // should be 0.
9711 if (isMAC(Opc)) {
9712 auto *it = Inst.begin();
9713 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9714 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9715 ++it;
9716 // Copy the operand to ensure it's not invalidated when Inst grows.
9717 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9718 }
9719}
9720
9721void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9722 OptionalImmIndexMap OptionalIdx;
9723 cvtVOP3(Inst, Operands, OptionalIdx);
9724}
9725
9726void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9727 OptionalImmIndexMap &OptIdx) {
9728 const int Opc = Inst.getOpcode();
9729 const MCInstrDesc &Desc = MII.get(Opc);
9730
9731 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9732
9733 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9734 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9735 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9736 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9737 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9738 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9739 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9740 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9741 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9742 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9743 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9744 Inst.addOperand(Inst.getOperand(0));
9745 }
9746
9747 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9748 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9749 // of the named operand to detect that, the same way cvtVOP3DPP does
9750 // internally.
9751 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9752 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9753 Inst.addOperand(Inst.getOperand(0));
9754
9755 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9756 if (BitOp3Idx != -1) {
9757 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9758 }
9759
9760 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9761 // instruction, and then figure out where to actually put the modifiers
9762
9763 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9764 if (OpSelIdx != -1) {
9765 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9766 }
9767
9768 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9769 if (OpSelHiIdx != -1) {
9770 int DefaultVal = IsPacked ? -1 : 0;
9771 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9772 DefaultVal);
9773 }
9774
9775 int MatrixAFMTIdx =
9776 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9777 if (MatrixAFMTIdx != -1) {
9778 addOptionalImmOperand(Inst, Operands, OptIdx,
9779 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9780 }
9781
9782 int MatrixBFMTIdx =
9783 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9784 if (MatrixBFMTIdx != -1) {
9785 addOptionalImmOperand(Inst, Operands, OptIdx,
9786 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9787 }
9788
9789 int MatrixAScaleIdx =
9790 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9791 if (MatrixAScaleIdx != -1) {
9792 addOptionalImmOperand(Inst, Operands, OptIdx,
9793 AMDGPUOperand::ImmTyMatrixAScale, 0);
9794 }
9795
9796 int MatrixBScaleIdx =
9797 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9798 if (MatrixBScaleIdx != -1) {
9799 addOptionalImmOperand(Inst, Operands, OptIdx,
9800 AMDGPUOperand::ImmTyMatrixBScale, 0);
9801 }
9802
9803 int MatrixAScaleFmtIdx =
9804 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9805 if (MatrixAScaleFmtIdx != -1) {
9806 addOptionalImmOperand(Inst, Operands, OptIdx,
9807 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9808 }
9809
9810 int MatrixBScaleFmtIdx =
9811 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9812 if (MatrixBScaleFmtIdx != -1) {
9813 addOptionalImmOperand(Inst, Operands, OptIdx,
9814 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9815 }
9816
9817 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9818 addOptionalImmOperand(Inst, Operands, OptIdx,
9819 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9820
9821 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9822 addOptionalImmOperand(Inst, Operands, OptIdx,
9823 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9824
9825 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9826 if (NegLoIdx != -1)
9827 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9828
9829 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9830 if (NegHiIdx != -1)
9831 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9832
9833 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9834 AMDGPU::OpName::src2};
9835 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9836 AMDGPU::OpName::src1_modifiers,
9837 AMDGPU::OpName::src2_modifiers};
9838
9839 unsigned OpSel = 0;
9840 unsigned OpSelHi = 0;
9841 unsigned NegLo = 0;
9842 unsigned NegHi = 0;
9843
9844 if (OpSelIdx != -1)
9845 OpSel = Inst.getOperand(OpSelIdx).getImm();
9846
9847 if (OpSelHiIdx != -1)
9848 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9849
9850 if (NegLoIdx != -1)
9851 NegLo = Inst.getOperand(NegLoIdx).getImm();
9852
9853 if (NegHiIdx != -1)
9854 NegHi = Inst.getOperand(NegHiIdx).getImm();
9855
9856 for (int J = 0; J < 3; ++J) {
9857 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9858 if (OpIdx == -1)
9859 break;
9860
9861 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9862
9863 if (ModIdx == -1)
9864 continue;
9865
9866 uint32_t ModVal = 0;
9867
9868 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9869 if (SrcOp.isReg() && getMRI()
9870 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9871 .contains(SrcOp.getReg())) {
9872 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9873 if (VGPRSuffixIsHi)
9874 ModVal |= SISrcMods::OP_SEL_0;
9875 } else {
9876 if ((OpSel & (1 << J)) != 0)
9877 ModVal |= SISrcMods::OP_SEL_0;
9878 }
9879
9880 if ((OpSelHi & (1 << J)) != 0)
9881 ModVal |= SISrcMods::OP_SEL_1;
9882
9883 if ((NegLo & (1 << J)) != 0)
9884 ModVal |= SISrcMods::NEG;
9885
9886 if ((NegHi & (1 << J)) != 0)
9887 ModVal |= SISrcMods::NEG_HI;
9888
9889 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9890 }
9891}
9892
9893void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9894 OptionalImmIndexMap OptIdx;
9895 cvtVOP3(Inst, Operands, OptIdx);
9896 cvtVOP3P(Inst, Operands, OptIdx);
9897}
9898
9900 unsigned i, unsigned Opc,
9901 AMDGPU::OpName OpName) {
9902 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9903 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9904 else
9905 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9906}
9907
9908void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9909 unsigned Opc = Inst.getOpcode();
9910
9911 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9912 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9913 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9914 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9915 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9916
9917 OptionalImmIndexMap OptIdx;
9918 for (unsigned i = 5; i < Operands.size(); ++i) {
9919 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9920 OptIdx[Op.getImmTy()] = i;
9921 }
9922
9923 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9924 addOptionalImmOperand(Inst, Operands, OptIdx,
9925 AMDGPUOperand::ImmTyIndexKey8bit);
9926
9927 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9928 addOptionalImmOperand(Inst, Operands, OptIdx,
9929 AMDGPUOperand::ImmTyIndexKey16bit);
9930
9931 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9932 addOptionalImmOperand(Inst, Operands, OptIdx,
9933 AMDGPUOperand::ImmTyIndexKey32bit);
9934
9935 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9936 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9937
9938 cvtVOP3P(Inst, Operands, OptIdx);
9939}
9940
9941//===----------------------------------------------------------------------===//
9942// VOPD
9943//===----------------------------------------------------------------------===//
9944
9945ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9946 if (!hasVOPD(getSTI()))
9947 return ParseStatus::NoMatch;
9948
9949 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9950 SMLoc S = getLoc();
9951 lex();
9952 lex();
9953 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9954 SMLoc OpYLoc = getLoc();
9955 StringRef OpYName;
9956 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9957 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9958 return ParseStatus::Success;
9959 }
9960 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9961 }
9962 return ParseStatus::NoMatch;
9963}
9964
9965// Create VOPD MCInst operands using parsed assembler operands.
9966void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9967 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9968
9969 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9970 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9972 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9973 return;
9974 }
9975 if (Op.isReg()) {
9976 Op.addRegOperands(Inst, 1);
9977 return;
9978 }
9979 if (Op.isImm()) {
9980 Op.addImmOperands(Inst, 1);
9981 return;
9982 }
9983 llvm_unreachable("Unhandled operand type in cvtVOPD");
9984 };
9985
9986 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9987
9988 // MCInst operands are ordered as follows:
9989 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9990
9991 for (auto CompIdx : VOPD::COMPONENTS) {
9992 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9993 }
9994
9995 for (auto CompIdx : VOPD::COMPONENTS) {
9996 const auto &CInfo = InstInfo[CompIdx];
9997 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9998 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9999 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10000 if (CInfo.hasSrc2Acc())
10001 addOp(CInfo.getIndexOfDstInParsedOperands());
10002 }
10003
10004 int BitOp3Idx =
10005 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
10006 if (BitOp3Idx != -1) {
10007 OptionalImmIndexMap OptIdx;
10008 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10009 if (Op.isImm())
10010 OptIdx[Op.getImmTy()] = Operands.size() - 1;
10011
10012 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
10013 }
10014}
10015
10016//===----------------------------------------------------------------------===//
10017// dpp
10018//===----------------------------------------------------------------------===//
10019
10020bool AMDGPUOperand::isDPP8() const {
10021 return isImmTy(ImmTyDPP8);
10022}
10023
10024bool AMDGPUOperand::isDPPCtrl() const {
10025 using namespace AMDGPU::DPP;
10026
10027 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
10028 if (result) {
10029 int64_t Imm = getImm();
10030 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
10031 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
10032 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
10033 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
10034 (Imm == DppCtrl::WAVE_SHL1) ||
10035 (Imm == DppCtrl::WAVE_ROL1) ||
10036 (Imm == DppCtrl::WAVE_SHR1) ||
10037 (Imm == DppCtrl::WAVE_ROR1) ||
10038 (Imm == DppCtrl::ROW_MIRROR) ||
10039 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
10040 (Imm == DppCtrl::BCAST15) ||
10041 (Imm == DppCtrl::BCAST31) ||
10042 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
10043 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10044 }
10045 return false;
10046}
10047
10048//===----------------------------------------------------------------------===//
10049// mAI
10050//===----------------------------------------------------------------------===//
10051
10052bool AMDGPUOperand::isBLGP() const {
10053 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
10054}
10055
10056bool AMDGPUOperand::isS16Imm() const {
10057 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
10058}
10059
10060bool AMDGPUOperand::isU16Imm() const {
10061 return isImmLiteral() && isUInt<16>(getImm());
10062}
10063
10064//===----------------------------------------------------------------------===//
10065// dim
10066//===----------------------------------------------------------------------===//
10067
10068bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10069 // We want to allow "dim:1D" etc.,
10070 // but the initial 1 is tokenized as an integer.
10071 std::string Token;
10072 if (isToken(AsmToken::Integer)) {
10073 SMLoc Loc = getToken().getEndLoc();
10074 Token = std::string(getTokenStr());
10075 lex();
10076 if (getLoc() != Loc)
10077 return false;
10078 }
10079
10080 StringRef Suffix;
10081 if (!parseId(Suffix))
10082 return false;
10083 Token += Suffix;
10084
10085 StringRef DimId = Token;
10086 DimId.consume_front("SQ_RSRC_IMG_");
10087
10088 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
10089 if (!DimInfo)
10090 return false;
10091
10092 Encoding = DimInfo->Encoding;
10093 return true;
10094}
10095
10096ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10097 if (!isGFX10Plus())
10098 return ParseStatus::NoMatch;
10099
10100 SMLoc S = getLoc();
10101
10102 if (!trySkipId("dim", AsmToken::Colon))
10103 return ParseStatus::NoMatch;
10104
10105 unsigned Encoding;
10106 SMLoc Loc = getLoc();
10107 if (!parseDimId(Encoding))
10108 return Error(Loc, "invalid dim value");
10109
10110 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
10111 AMDGPUOperand::ImmTyDim));
10112 return ParseStatus::Success;
10113}
10114
10115//===----------------------------------------------------------------------===//
10116// dpp
10117//===----------------------------------------------------------------------===//
10118
10119ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10120 SMLoc S = getLoc();
10121
10122 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10123 return ParseStatus::NoMatch;
10124
10125 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10126
10127 int64_t Sels[8];
10128
10129 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10130 return ParseStatus::Failure;
10131
10132 for (size_t i = 0; i < 8; ++i) {
10133 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10134 return ParseStatus::Failure;
10135
10136 SMLoc Loc = getLoc();
10137 if (getParser().parseAbsoluteExpression(Sels[i]))
10138 return ParseStatus::Failure;
10139 if (0 > Sels[i] || 7 < Sels[i])
10140 return Error(Loc, "expected a 3-bit value");
10141 }
10142
10143 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10144 return ParseStatus::Failure;
10145
10146 unsigned DPP8 = 0;
10147 for (size_t i = 0; i < 8; ++i)
10148 DPP8 |= (Sels[i] << (i * 3));
10149
10150 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10151 return ParseStatus::Success;
10152}
10153
10154bool
10155AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10156 const OperandVector &Operands) {
10157 if (Ctrl == "row_newbcast")
10158 return isGFX90A();
10159
10160 if (Ctrl == "row_share" ||
10161 Ctrl == "row_xmask")
10162 return isGFX10Plus();
10163
10164 if (Ctrl == "wave_shl" ||
10165 Ctrl == "wave_shr" ||
10166 Ctrl == "wave_rol" ||
10167 Ctrl == "wave_ror" ||
10168 Ctrl == "row_bcast")
10169 return isVI() || isGFX9();
10170
10171 return Ctrl == "row_mirror" ||
10172 Ctrl == "row_half_mirror" ||
10173 Ctrl == "quad_perm" ||
10174 Ctrl == "row_shl" ||
10175 Ctrl == "row_shr" ||
10176 Ctrl == "row_ror";
10177}
10178
10179int64_t
10180AMDGPUAsmParser::parseDPPCtrlPerm() {
10181 // quad_perm:[%d,%d,%d,%d]
10182
10183 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10184 return -1;
10185
10186 int64_t Val = 0;
10187 for (int i = 0; i < 4; ++i) {
10188 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10189 return -1;
10190
10191 int64_t Temp;
10192 SMLoc Loc = getLoc();
10193 if (getParser().parseAbsoluteExpression(Temp))
10194 return -1;
10195 if (Temp < 0 || Temp > 3) {
10196 Error(Loc, "expected a 2-bit value");
10197 return -1;
10198 }
10199
10200 Val += (Temp << i * 2);
10201 }
10202
10203 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10204 return -1;
10205
10206 return Val;
10207}
10208
10209int64_t
10210AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10211 using namespace AMDGPU::DPP;
10212
10213 // sel:%d
10214
10215 int64_t Val;
10216 SMLoc Loc = getLoc();
10217
10218 if (getParser().parseAbsoluteExpression(Val))
10219 return -1;
10220
10221 struct DppCtrlCheck {
10222 int64_t Ctrl;
10223 int Lo;
10224 int Hi;
10225 };
10226
10227 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10228 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10229 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10230 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10231 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10232 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10233 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10234 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10235 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10236 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10237 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10238 .Default({-1, 0, 0});
10239
10240 bool Valid;
10241 if (Check.Ctrl == -1) {
10242 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10243 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10244 } else {
10245 Valid = Check.Lo <= Val && Val <= Check.Hi;
10246 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10247 }
10248
10249 if (!Valid) {
10250 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10251 return -1;
10252 }
10253
10254 return Val;
10255}
10256
10257ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10258 using namespace AMDGPU::DPP;
10259
10260 if (!isToken(AsmToken::Identifier) ||
10261 !isSupportedDPPCtrl(getTokenStr(), Operands))
10262 return ParseStatus::NoMatch;
10263
10264 SMLoc S = getLoc();
10265 int64_t Val = -1;
10266 StringRef Ctrl;
10267
10268 parseId(Ctrl);
10269
10270 if (Ctrl == "row_mirror") {
10271 Val = DppCtrl::ROW_MIRROR;
10272 } else if (Ctrl == "row_half_mirror") {
10273 Val = DppCtrl::ROW_HALF_MIRROR;
10274 } else {
10275 if (skipToken(AsmToken::Colon, "expected a colon")) {
10276 if (Ctrl == "quad_perm") {
10277 Val = parseDPPCtrlPerm();
10278 } else {
10279 Val = parseDPPCtrlSel(Ctrl);
10280 }
10281 }
10282 }
10283
10284 if (Val == -1)
10285 return ParseStatus::Failure;
10286
10287 Operands.push_back(
10288 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10289 return ParseStatus::Success;
10290}
10291
10292void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10293 bool IsDPP8) {
10294 OptionalImmIndexMap OptionalIdx;
10295 unsigned Opc = Inst.getOpcode();
10296 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10297
10298 // MAC instructions are special because they have 'old'
10299 // operand which is not tied to dst (but assumed to be).
10300 // They also have dummy unused src2_modifiers.
10301 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10302 int Src2ModIdx =
10303 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10304 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10305 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10306
10307 unsigned I = 1;
10308 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10309 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10310 }
10311
10312 int Fi = 0;
10313 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10314 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10315 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10316 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10317 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10318 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10319 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10320 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10321 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10322
10323 for (unsigned E = Operands.size(); I != E; ++I) {
10324
10325 if (IsMAC) {
10326 int NumOperands = Inst.getNumOperands();
10327 if (OldIdx == NumOperands) {
10328 // Handle old operand
10329 constexpr int DST_IDX = 0;
10330 Inst.addOperand(Inst.getOperand(DST_IDX));
10331 } else if (Src2ModIdx == NumOperands) {
10332 // Add unused dummy src2_modifiers
10334 }
10335 }
10336
10337 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10338 Inst.addOperand(Inst.getOperand(0));
10339 }
10340
10341 if (IsVOP3CvtSrDpp) {
10342 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10344 Inst.addOperand(MCOperand::createReg(MCRegister()));
10345 }
10346 }
10347
10348 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10350 if (TiedTo != -1) {
10351 assert((unsigned)TiedTo < Inst.getNumOperands());
10352 // handle tied old or src2 for MAC instructions
10353 Inst.addOperand(Inst.getOperand(TiedTo));
10354 }
10355 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10356 // Add the register arguments
10357 if (IsDPP8 && Op.isDppFI()) {
10358 Fi = Op.getImm();
10359 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10360 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10361 } else if (Op.isReg()) {
10362 Op.addRegOperands(Inst, 1);
10363 } else if (Op.isImm() &&
10364 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10365 Op.addImmOperands(Inst, 1);
10366 } else if (Op.isImm()) {
10367 OptionalIdx[Op.getImmTy()] = I;
10368 } else {
10369 llvm_unreachable("unhandled operand type");
10370 }
10371 }
10372
10373 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10374 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10375 AMDGPUOperand::ImmTyClamp);
10376
10377 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10378 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10379 Inst.addOperand(Inst.getOperand(0));
10380 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10381 AMDGPUOperand::ImmTyByteSel);
10382 }
10383
10384 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10385 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10386
10387 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10388 cvtVOP3P(Inst, Operands, OptionalIdx);
10389 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10390 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10391 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10393 }
10394
10395 if (IsDPP8) {
10396 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10397 using namespace llvm::AMDGPU::DPP;
10398 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10399 } else {
10400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10402 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10404
10405 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10406 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10407 AMDGPUOperand::ImmTyDppFI);
10408 }
10409}
10410
10411void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10412 OptionalImmIndexMap OptionalIdx;
10413
10414 unsigned I = 1;
10415 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10416 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10417 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10418 }
10419
10420 int Fi = 0;
10421 for (unsigned E = Operands.size(); I != E; ++I) {
10422 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10424 if (TiedTo != -1) {
10425 assert((unsigned)TiedTo < Inst.getNumOperands());
10426 // handle tied old or src2 for MAC instructions
10427 Inst.addOperand(Inst.getOperand(TiedTo));
10428 }
10429 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10430 // Add the register arguments
10431 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10432 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10433 // Skip it.
10434 continue;
10435 }
10436
10437 if (IsDPP8) {
10438 if (Op.isDPP8()) {
10439 Op.addImmOperands(Inst, 1);
10440 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10441 Op.addRegWithFPInputModsOperands(Inst, 2);
10442 } else if (Op.isDppFI()) {
10443 Fi = Op.getImm();
10444 } else if (Op.isReg()) {
10445 Op.addRegOperands(Inst, 1);
10446 } else {
10447 llvm_unreachable("Invalid operand type");
10448 }
10449 } else {
10451 Op.addRegWithFPInputModsOperands(Inst, 2);
10452 } else if (Op.isReg()) {
10453 Op.addRegOperands(Inst, 1);
10454 } else if (Op.isDPPCtrl()) {
10455 Op.addImmOperands(Inst, 1);
10456 } else if (Op.isImm()) {
10457 // Handle optional arguments
10458 OptionalIdx[Op.getImmTy()] = I;
10459 } else {
10460 llvm_unreachable("Invalid operand type");
10461 }
10462 }
10463 }
10464
10465 if (IsDPP8) {
10466 using namespace llvm::AMDGPU::DPP;
10467 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10468 } else {
10469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10472 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10473 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10474 AMDGPUOperand::ImmTyDppFI);
10475 }
10476 }
10477}
10478
10479//===----------------------------------------------------------------------===//
10480// sdwa
10481//===----------------------------------------------------------------------===//
10482
10483ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10484 StringRef Prefix,
10485 AMDGPUOperand::ImmTy Type) {
10486 return parseStringOrIntWithPrefix(
10487 Operands, Prefix,
10488 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10489 Type);
10490}
10491
10492ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10493 return parseStringOrIntWithPrefix(
10494 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10495 AMDGPUOperand::ImmTySDWADstUnused);
10496}
10497
10498void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10499 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10500}
10501
10502void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10503 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10504}
10505
10506void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10507 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10508}
10509
10510void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10511 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10512}
10513
10514void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10515 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10516}
10517
10518void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10519 uint64_t BasicInstType,
10520 bool SkipDstVcc,
10521 bool SkipSrcVcc) {
10522 using namespace llvm::AMDGPU::SDWA;
10523
10524 OptionalImmIndexMap OptionalIdx;
10525 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10526 bool SkippedVcc = false;
10527
10528 unsigned I = 1;
10529 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10530 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10531 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10532 }
10533
10534 for (unsigned E = Operands.size(); I != E; ++I) {
10535 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10536 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10537 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10538 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10539 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10540 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10541 // Skip VCC only if we didn't skip it on previous iteration.
10542 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10543 if (BasicInstType == SIInstrFlags::VOP2 &&
10544 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10545 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10546 SkippedVcc = true;
10547 continue;
10548 }
10549 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10550 SkippedVcc = true;
10551 continue;
10552 }
10553 }
10555 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10556 } else if (Op.isImm()) {
10557 // Handle optional arguments
10558 OptionalIdx[Op.getImmTy()] = I;
10559 } else {
10560 llvm_unreachable("Invalid operand type");
10561 }
10562 SkippedVcc = false;
10563 }
10564
10565 const unsigned Opc = Inst.getOpcode();
10566 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10567 Opc != AMDGPU::V_NOP_sdwa_vi) {
10568 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10569 switch (BasicInstType) {
10570 case SIInstrFlags::VOP1:
10571 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10572 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10573 AMDGPUOperand::ImmTyClamp, 0);
10574
10575 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10576 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10577 AMDGPUOperand::ImmTyOModSI, 0);
10578
10579 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10580 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10581 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10582
10583 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10584 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10585 AMDGPUOperand::ImmTySDWADstUnused,
10586 DstUnused::UNUSED_PRESERVE);
10587
10588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10589 break;
10590
10591 case SIInstrFlags::VOP2:
10592 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10593 AMDGPUOperand::ImmTyClamp, 0);
10594
10595 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10596 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10597
10598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10599 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10602 break;
10603
10604 case SIInstrFlags::VOPC:
10605 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10606 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10607 AMDGPUOperand::ImmTyClamp, 0);
10608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10610 break;
10611
10612 default:
10613 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10614 }
10615 }
10616
10617 // special case v_mac_{f16, f32}:
10618 // it has src2 register operand that is tied to dst operand
10619 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10620 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10621 auto *it = Inst.begin();
10622 std::advance(
10623 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10624 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10625 }
10626}
10627
10628/// Force static initialization.
10629extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10634
10635#define GET_MATCHER_IMPLEMENTATION
10636#define GET_MNEMONIC_SPELL_CHECKER
10637#define GET_MNEMONIC_CHECKER
10638#include "AMDGPUGenAsmMatcher.inc"
10639
10640ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10641 unsigned MCK) {
10642 switch (MCK) {
10643 case MCK_addr64:
10644 return parseTokenOp("addr64", Operands);
10645 case MCK_done:
10646 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10647 case MCK_idxen:
10648 return parseTokenOp("idxen", Operands);
10649 case MCK_lds:
10650 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10651 /*IgnoreNegative=*/true);
10652 case MCK_offen:
10653 return parseTokenOp("offen", Operands);
10654 case MCK_off:
10655 return parseTokenOp("off", Operands);
10656 case MCK_row_95_en:
10657 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10658 case MCK_gds:
10659 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10660 case MCK_tfe:
10661 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10662 }
10663 return tryCustomParseOperand(Operands, MCK);
10664}
10665
10666// This function should be defined after auto-generated include so that we have
10667// MatchClassKind enum defined
10668unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10669 unsigned Kind) {
10670 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10671 // But MatchInstructionImpl() expects to meet token and fails to validate
10672 // operand. This method checks if we are given immediate operand but expect to
10673 // get corresponding token.
10674 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10675 switch (Kind) {
10676 case MCK_addr64:
10677 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10678 case MCK_gds:
10679 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10680 case MCK_lds:
10681 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10682 case MCK_idxen:
10683 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10684 case MCK_offen:
10685 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10686 case MCK_tfe:
10687 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10688 case MCK_done:
10689 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10690 case MCK_row_95_en:
10691 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10692 case MCK_SSrc_b32:
10693 // When operands have expression values, they will return true for isToken,
10694 // because it is not possible to distinguish between a token and an
10695 // expression at parse time. MatchInstructionImpl() will always try to
10696 // match an operand as a token, when isToken returns true, and when the
10697 // name of the expression is not a valid token, the match will fail,
10698 // so we need to handle it here.
10699 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10700 case MCK_SSrc_f32:
10701 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10702 case MCK_SOPPBrTarget:
10703 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10704 case MCK_VReg32OrOff:
10705 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10706 case MCK_InterpSlot:
10707 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10708 case MCK_InterpAttr:
10709 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10710 case MCK_InterpAttrChan:
10711 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10712 case MCK_SReg_64:
10713 case MCK_SReg_64_XEXEC:
10714 // Null is defined as a 32-bit register but
10715 // it should also be enabled with 64-bit operands or larger.
10716 // The following code enables it for SReg_64 and larger operands
10717 // used as source and destination. Remaining source
10718 // operands are handled in isInlinableImm.
10719 case MCK_SReg_96:
10720 case MCK_SReg_128:
10721 case MCK_SReg_256:
10722 case MCK_SReg_512:
10723 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10724 default:
10725 return Match_InvalidOperand;
10726 }
10727}
10728
10729//===----------------------------------------------------------------------===//
10730// endpgm
10731//===----------------------------------------------------------------------===//
10732
10733ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10734 SMLoc S = getLoc();
10735 int64_t Imm = 0;
10736
10737 if (!parseExpr(Imm)) {
10738 // The operand is optional, if not present default to 0
10739 Imm = 0;
10740 }
10741
10742 if (!isUInt<16>(Imm))
10743 return Error(S, "expected a 16-bit value");
10744
10745 Operands.push_back(
10746 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10747 return ParseStatus::Success;
10748}
10749
10750bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10751
10752//===----------------------------------------------------------------------===//
10753// Split Barrier
10754//===----------------------------------------------------------------------===//
10755
10756bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5899
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:32
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...