57enum RegisterKind { IS_UNKNOWN,
IS_VGPR, IS_SGPR,
IS_AGPR, IS_TTMP, IS_SPECIAL };
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
75 AMDGPUOperand(KindTy Kind_,
const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
86 bool hasFPModifiers()
const {
return Abs || Neg; }
87 bool hasIntModifiers()
const {
return Sext; }
88 bool hasModifiers()
const {
return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit()
const {
return Lit == LitModifier::Lit; }
90 bool isForcedLit64()
const {
return Lit == LitModifier::Lit64; }
92 int64_t getFPModifiersOperand()
const {
99 int64_t getIntModifiersOperand()
const {
105 int64_t getModifiersOperand()
const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 &&
"fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
115 friend raw_ostream &
operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
223 mutable int MCOpIdx = -1;
226 bool isToken()
const override {
return Kind == Token; }
228 bool isSymbolRefExpr()
const {
232 bool isImm()
const override {
233 return Kind == Immediate;
236 bool isInlinableImm(MVT type)
const;
237 bool isLiteralImm(MVT type)
const;
239 bool isRegKind()
const {
240 return Kind == Register;
243 bool isReg()
const override {
244 return isRegKind() && !hasModifiers();
247 bool isRegOrInline(
unsigned RCID, MVT type)
const {
248 return isRegClass(RCID) || isInlinableImm(type);
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
255 bool isRegOrImmWithInt16InputMods()
const {
259 template <
bool IsFake16>
bool isRegOrImmWithIntT16InputMods()
const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
264 bool isRegOrImmWithInt32InputMods()
const {
268 bool isRegOrInlineImmWithInt16InputMods()
const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
272 template <
bool IsFake16>
bool isRegOrInlineImmWithIntT16InputMods()
const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
277 bool isRegOrInlineImmWithInt32InputMods()
const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
281 bool isRegOrImmWithInt64InputMods()
const {
285 bool isRegOrImmWithFP16InputMods()
const {
289 template <
bool IsFake16>
bool isRegOrImmWithFPT16InputMods()
const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
294 bool isRegOrImmWithFP32InputMods()
const {
298 bool isRegOrImmWithFP64InputMods()
const {
302 template <
bool IsFake16>
bool isRegOrInlineImmWithFP16InputMods()
const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
307 bool isRegOrInlineImmWithFP32InputMods()
const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 bool isRegOrInlineImmWithFP64InputMods()
const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
315 bool isVRegWithInputMods(
unsigned RCID)
const {
return isRegClass(RCID); }
317 bool isVRegWithFP32InputMods()
const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
321 bool isVRegWithFP64InputMods()
const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
325 bool isPackedFP16InputMods()
const {
329 bool isPackedVGPRFP32InputMods()
const {
333 bool isVReg()
const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
345 bool isVReg32()
const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
349 bool isVReg32OrOff()
const {
350 return isOff() || isVReg32();
354 return isRegKind() &&
getReg() == AMDGPU::SGPR_NULL;
357 bool isAV_LdSt_32_Align2_RegOp()
const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
362 bool isVRegWithInputMods()
const;
363 template <
bool IsFake16>
bool isT16_Lo128VRegWithInputMods()
const;
364 template <
bool IsFake16>
bool isT16VRegWithInputMods()
const;
366 bool isSDWAOperand(MVT type)
const;
367 bool isSDWAFP16Operand()
const;
368 bool isSDWAFP32Operand()
const;
369 bool isSDWAInt16Operand()
const;
370 bool isSDWAInt32Operand()
const;
372 bool isImmTy(ImmTy ImmT)
const {
373 return isImm() &&
Imm.Type == ImmT;
376 template <ImmTy Ty>
bool isImmTy()
const {
return isImmTy(Ty); }
378 bool isImmLiteral()
const {
return isImmTy(ImmTyNone); }
380 bool isImmModifier()
const {
381 return isImm() &&
Imm.Type != ImmTyNone;
384 bool isOModSI()
const {
return isImmTy(ImmTyOModSI); }
385 bool isDim()
const {
return isImmTy(ImmTyDim); }
386 bool isR128A16()
const {
return isImmTy(ImmTyR128A16); }
387 bool isOff()
const {
return isImmTy(ImmTyOff); }
388 bool isExpTgt()
const {
return isImmTy(ImmTyExpTgt); }
389 bool isOffen()
const {
return isImmTy(ImmTyOffen); }
390 bool isIdxen()
const {
return isImmTy(ImmTyIdxen); }
391 bool isAddr64()
const {
return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod()
const {
return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset()
const {
return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS()
const {
return isImmTy(ImmTyGDS); }
395 bool isLDS()
const {
return isImmTy(ImmTyLDS); }
396 bool isCPol()
const {
return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit()
const {
return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit()
const {
return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit()
const {
return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT()
const {
return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT()
const {
return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale()
const {
return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale()
const {
return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt()
const {
return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt()
const {
return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse()
const {
return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse()
const {
return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE()
const {
return isImmTy(ImmTyTFE); }
409 bool isFORMAT()
const {
return isImmTy(ImmTyFORMAT) &&
isUInt<7>(
getImm()); }
410 bool isDppFI()
const {
return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel()
const {
return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel()
const {
return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel()
const {
return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused()
const {
return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot()
const {
return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr()
const {
return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan()
const {
return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel()
const {
return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi()
const {
return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo()
const {
return isImmTy(ImmTyNegLo); }
421 bool isNegHi()
const {
return isImmTy(ImmTyNegHi); }
422 bool isBitOp3()
const {
return isImmTy(ImmTyBitOp3) &&
isUInt<8>(
getImm()); }
423 bool isDone()
const {
return isImmTy(ImmTyDone); }
424 bool isRowEn()
const {
return isImmTy(ImmTyRowEn); }
426 bool isRegOrImm()
const {
427 return isReg() || isImm();
430 bool isRegClass(
unsigned RCID)
const;
434 bool isRegOrInlineNoMods(
unsigned RCID, MVT type)
const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
438 bool isSCSrcB16()
const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
442 bool isSCSrcV2B16()
const {
446 bool isSCSrc_b32()
const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
450 bool isSCSrc_b64()
const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
454 bool isBoolReg()
const;
456 bool isSCSrcF16()
const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
460 bool isSCSrcV2F16()
const {
464 bool isSCSrcF32()
const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
468 bool isSCSrcF64()
const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
472 bool isSSrc_b32()
const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
476 bool isSSrc_b16()
const {
return isSCSrcB16() || isLiteralImm(MVT::i16); }
478 bool isSSrcV2B16()
const {
483 bool isSSrc_b64()
const {
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((
const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
492 bool isSSrc_f32()
const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
496 bool isSSrcF64()
const {
return isSCSrc_b64() || isLiteralImm(MVT::f64); }
498 bool isSSrc_bf16()
const {
return isSCSrcB16() || isLiteralImm(MVT::bf16); }
500 bool isSSrc_f16()
const {
return isSCSrcB16() || isLiteralImm(MVT::f16); }
502 bool isSSrcV2F16()
const {
507 bool isSSrcV2FP32()
const {
512 bool isSCSrcV2FP32()
const {
517 bool isSSrcV2INT32()
const {
522 bool isSCSrcV2INT32()
const {
524 return isSCSrc_b32();
527 bool isSSrcOrLds_b32()
const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
532 bool isVCSrc_b32()
const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
536 bool isVCSrc_b32_Lo256()
const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
540 bool isVCSrc_b64_Lo256()
const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
544 bool isVCSrc_b64()
const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
548 bool isVCSrcT_b16()
const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
552 bool isVCSrcTB16_Lo128()
const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
556 bool isVCSrcFake16B16_Lo128()
const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
560 bool isVCSrc_b16()
const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
564 bool isVCSrc_v2b16()
const {
return isVCSrc_b16(); }
566 bool isVCSrc_f32()
const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
570 bool isVCSrc_f64()
const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
574 bool isVCSrcTBF16()
const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
578 bool isVCSrcT_f16()
const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
582 bool isVCSrcT_bf16()
const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
586 bool isVCSrcTBF16_Lo128()
const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
590 bool isVCSrcTF16_Lo128()
const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
594 bool isVCSrcFake16BF16_Lo128()
const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
598 bool isVCSrcFake16F16_Lo128()
const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
602 bool isVCSrc_bf16()
const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
606 bool isVCSrc_f16()
const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
610 bool isVCSrc_v2bf16()
const {
return isVCSrc_bf16(); }
612 bool isVCSrc_v2f16()
const {
return isVCSrc_f16(); }
614 bool isVSrc_b32()
const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
618 bool isVSrc_b64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::i64); }
620 bool isVSrcT_b16()
const {
return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
622 bool isVSrcT_b16_Lo128()
const {
623 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
626 bool isVSrcFake16_b16_Lo128()
const {
627 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
630 bool isVSrc_b16()
const {
return isVCSrc_b16() || isLiteralImm(MVT::i16); }
632 bool isVSrc_v2b16()
const {
return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
634 bool isVCSrcV2FP32()
const {
return isVCSrc_f64(); }
636 bool isVSrc_v2f32()
const {
return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
638 bool isVCSrc_v2b32()
const {
return isVCSrc_b64(); }
640 bool isVSrc_v2b32()
const {
return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
642 bool isVSrc_f32()
const {
643 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
646 bool isVSrc_f64()
const {
return isVCSrc_f64() || isLiteralImm(MVT::f64); }
648 bool isVSrcT_bf16()
const {
return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
650 bool isVSrcT_f16()
const {
return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
652 bool isVSrcT_bf16_Lo128()
const {
653 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
656 bool isVSrcT_f16_Lo128()
const {
657 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
660 bool isVSrcFake16_bf16_Lo128()
const {
661 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
664 bool isVSrcFake16_f16_Lo128()
const {
665 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
668 bool isVSrc_bf16()
const {
return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
670 bool isVSrc_f16()
const {
return isVCSrc_f16() || isLiteralImm(MVT::f16); }
672 bool isVSrc_v2bf16()
const {
673 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
676 bool isVSrc_v2f16()
const {
return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
678 bool isVSrc_v2f16_splat()
const {
return isVSrc_v2f16(); }
680 bool isVSrc_NoInline_v2f16()
const {
return isVSrc_v2f16(); }
682 bool isVISrcB32()
const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
686 bool isVISrcB16()
const {
687 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
690 bool isVISrcV2B16()
const {
694 bool isVISrcF32()
const {
695 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
698 bool isVISrcF16()
const {
699 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
702 bool isVISrcV2F16()
const {
703 return isVISrcF16() || isVISrcB32();
706 bool isVISrc_64_bf16()
const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
710 bool isVISrc_64_f16()
const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
714 bool isVISrc_64_b32()
const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
718 bool isVISrc_64B64()
const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
722 bool isVISrc_64_f64()
const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
726 bool isVISrc_64V2FP32()
const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
730 bool isVISrc_64V2INT32()
const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
734 bool isVISrc_256_b32()
const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
738 bool isVISrc_256_f32()
const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
742 bool isVISrc_256B64()
const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
746 bool isVISrc_256_f64()
const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
750 bool isVISrc_512_f64()
const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
754 bool isVISrc_128B16()
const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
758 bool isVISrc_128V2B16()
const {
759 return isVISrc_128B16();
762 bool isVISrc_128_b32()
const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
766 bool isVISrc_128_f32()
const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
770 bool isVISrc_256V2FP32()
const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
774 bool isVISrc_256V2INT32()
const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
778 bool isVISrc_512_b32()
const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
782 bool isVISrc_512B16()
const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
786 bool isVISrc_512V2B16()
const {
787 return isVISrc_512B16();
790 bool isVISrc_512_f32()
const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
794 bool isVISrc_512F16()
const {
795 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
798 bool isVISrc_512V2F16()
const {
799 return isVISrc_512F16() || isVISrc_512_b32();
802 bool isVISrc_1024_b32()
const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
806 bool isVISrc_1024B16()
const {
807 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
810 bool isVISrc_1024V2B16()
const {
811 return isVISrc_1024B16();
814 bool isVISrc_1024_f32()
const {
815 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
818 bool isVISrc_1024F16()
const {
819 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
822 bool isVISrc_1024V2F16()
const {
823 return isVISrc_1024F16() || isVISrc_1024_b32();
826 bool isAISrcB32()
const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
830 bool isAISrcB16()
const {
831 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
834 bool isAISrcV2B16()
const {
838 bool isAISrcF32()
const {
839 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
842 bool isAISrcF16()
const {
843 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
846 bool isAISrcV2F16()
const {
847 return isAISrcF16() || isAISrcB32();
850 bool isAISrc_64B64()
const {
851 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
854 bool isAISrc_64_f64()
const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
858 bool isAISrc_128_b32()
const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
862 bool isAISrc_128B16()
const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
866 bool isAISrc_128V2B16()
const {
867 return isAISrc_128B16();
870 bool isAISrc_128_f32()
const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
874 bool isAISrc_128F16()
const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
878 bool isAISrc_128V2F16()
const {
879 return isAISrc_128F16() || isAISrc_128_b32();
882 bool isVISrc_128_bf16()
const {
883 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
886 bool isVISrc_128_f16()
const {
887 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
890 bool isVISrc_128V2F16()
const {
891 return isVISrc_128_f16() || isVISrc_128_b32();
894 bool isAISrc_256B64()
const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
898 bool isAISrc_256_f64()
const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
902 bool isAISrc_512_b32()
const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
906 bool isAISrc_512B16()
const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
910 bool isAISrc_512V2B16()
const {
911 return isAISrc_512B16();
914 bool isAISrc_512_f32()
const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
918 bool isAISrc_512F16()
const {
919 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
922 bool isAISrc_512V2F16()
const {
923 return isAISrc_512F16() || isAISrc_512_b32();
926 bool isAISrc_1024_b32()
const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
930 bool isAISrc_1024B16()
const {
931 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
934 bool isAISrc_1024V2B16()
const {
935 return isAISrc_1024B16();
938 bool isAISrc_1024_f32()
const {
939 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
942 bool isAISrc_1024F16()
const {
943 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
946 bool isAISrc_1024V2F16()
const {
947 return isAISrc_1024F16() || isAISrc_1024_b32();
950 bool isKImmFP32()
const {
951 return isLiteralImm(MVT::f32);
954 bool isKImmFP16()
const {
955 return isLiteralImm(MVT::f16);
958 bool isKImmFP64()
const {
return isLiteralImm(MVT::f64); }
960 bool isMem()
const override {
964 bool isExpr()
const {
965 return Kind == Expression;
968 bool isSOPPBrTarget()
const {
return isExpr() || isImm(); }
970 bool isSWaitCnt()
const;
971 bool isDepCtr()
const;
972 bool isSDelayALU()
const;
973 bool isHwreg()
const;
974 bool isSendMsg()
const;
975 bool isWaitEvent()
const;
976 bool isSplitBarrier()
const;
977 bool isSwizzle()
const;
978 bool isSMRDOffset8()
const;
979 bool isSMEMOffset()
const;
980 bool isSMRDLiteralOffset()
const;
982 bool isDPPCtrl()
const;
984 bool isGPRIdxMode()
const;
985 bool isS16Imm()
const;
986 bool isU16Imm()
const;
987 bool isEndpgm()
const;
989 auto getPredicate(std::function<
bool(
const AMDGPUOperand &
Op)>
P)
const {
990 return [
this,
P]() {
return P(*
this); };
995 return StringRef(Tok.Data, Tok.Length);
1003 void setImm(int64_t Val) {
1008 ImmTy getImmTy()
const {
1013 MCRegister
getReg()
const override {
1018 SMLoc getStartLoc()
const override {
1022 SMLoc getEndLoc()
const override {
1026 SMRange getLocRange()
const {
1027 return SMRange(StartLoc, EndLoc);
1030 int getMCOpIdx()
const {
return MCOpIdx; }
1032 Modifiers getModifiers()
const {
1033 assert(isRegKind() || isImmTy(ImmTyNone));
1034 return isRegKind() ?
Reg.Mods :
Imm.Mods;
1037 void setModifiers(Modifiers Mods) {
1038 assert(isRegKind() || isImmTy(ImmTyNone));
1045 bool hasModifiers()
const {
1046 return getModifiers().hasModifiers();
1049 bool hasFPModifiers()
const {
1050 return getModifiers().hasFPModifiers();
1053 bool hasIntModifiers()
const {
1054 return getModifiers().hasIntModifiers();
1057 bool isForcedLit()
const {
1058 return isImmLiteral() && getModifiers().isForcedLit();
1061 bool isForcedLit64()
const {
1062 return isImmLiteral() && getModifiers().isForcedLit64();
1065 uint64_t applyInputFPModifiers(uint64_t Val,
unsigned Size)
const;
1067 void addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers =
true)
const;
1069 void addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const;
1071 void addRegOperands(MCInst &Inst,
unsigned N)
const;
1073 void addRegOrImmOperands(MCInst &Inst,
unsigned N)
const {
1075 addRegOperands(Inst,
N);
1077 addImmOperands(Inst,
N);
1080 void addRegOrImmWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1081 Modifiers Mods = getModifiers();
1084 addRegOperands(Inst,
N);
1086 addImmOperands(Inst,
N,
false);
1090 void addRegOrImmWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1091 assert(!hasIntModifiers());
1092 addRegOrImmWithInputModsOperands(Inst,
N);
1095 void addRegOrImmWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1096 assert(!hasFPModifiers());
1097 addRegOrImmWithInputModsOperands(Inst,
N);
1100 void addRegWithInputModsOperands(MCInst &Inst,
unsigned N)
const {
1101 Modifiers Mods = getModifiers();
1104 addRegOperands(Inst,
N);
1107 void addRegWithFPInputModsOperands(MCInst &Inst,
unsigned N)
const {
1108 assert(!hasIntModifiers());
1109 addRegWithInputModsOperands(Inst,
N);
1112 void addRegWithIntInputModsOperands(MCInst &Inst,
unsigned N)
const {
1113 assert(!hasFPModifiers());
1114 addRegWithInputModsOperands(Inst,
N);
1117 static void printImmTy(raw_ostream& OS, ImmTy
Type) {
1120 case ImmTyNone: OS <<
"None";
break;
1121 case ImmTyGDS: OS <<
"GDS";
break;
1122 case ImmTyLDS: OS <<
"LDS";
break;
1123 case ImmTyOffen: OS <<
"Offen";
break;
1124 case ImmTyIdxen: OS <<
"Idxen";
break;
1125 case ImmTyAddr64: OS <<
"Addr64";
break;
1126 case ImmTyOffset: OS <<
"Offset";
break;
1127 case ImmTyInstOffset: OS <<
"InstOffset";
break;
1128 case ImmTyOffset0: OS <<
"Offset0";
break;
1129 case ImmTyOffset1: OS <<
"Offset1";
break;
1130 case ImmTySMEMOffsetMod: OS <<
"SMEMOffsetMod";
break;
1131 case ImmTyCPol: OS <<
"CPol";
break;
1132 case ImmTyIndexKey8bit: OS <<
"index_key";
break;
1133 case ImmTyIndexKey16bit: OS <<
"index_key";
break;
1134 case ImmTyIndexKey32bit: OS <<
"index_key";
break;
1135 case ImmTyTFE: OS <<
"TFE";
break;
1136 case ImmTyIsAsync: OS <<
"IsAsync";
break;
1137 case ImmTyD16: OS <<
"D16";
break;
1138 case ImmTyFORMAT: OS <<
"FORMAT";
break;
1139 case ImmTyClamp: OS <<
"Clamp";
break;
1140 case ImmTyOModSI: OS <<
"OModSI";
break;
1141 case ImmTyDPP8: OS <<
"DPP8";
break;
1142 case ImmTyDppCtrl: OS <<
"DppCtrl";
break;
1143 case ImmTyDppRowMask: OS <<
"DppRowMask";
break;
1144 case ImmTyDppBankMask: OS <<
"DppBankMask";
break;
1145 case ImmTyDppBoundCtrl: OS <<
"DppBoundCtrl";
break;
1146 case ImmTyDppFI: OS <<
"DppFI";
break;
1147 case ImmTySDWADstSel: OS <<
"SDWADstSel";
break;
1148 case ImmTySDWASrc0Sel: OS <<
"SDWASrc0Sel";
break;
1149 case ImmTySDWASrc1Sel: OS <<
"SDWASrc1Sel";
break;
1150 case ImmTySDWADstUnused: OS <<
"SDWADstUnused";
break;
1151 case ImmTyDMask: OS <<
"DMask";
break;
1152 case ImmTyDim: OS <<
"Dim";
break;
1153 case ImmTyUNorm: OS <<
"UNorm";
break;
1154 case ImmTyDA: OS <<
"DA";
break;
1155 case ImmTyR128A16: OS <<
"R128A16";
break;
1156 case ImmTyA16: OS <<
"A16";
break;
1157 case ImmTyLWE: OS <<
"LWE";
break;
1158 case ImmTyOff: OS <<
"Off";
break;
1159 case ImmTyExpTgt: OS <<
"ExpTgt";
break;
1160 case ImmTyExpCompr: OS <<
"ExpCompr";
break;
1161 case ImmTyExpVM: OS <<
"ExpVM";
break;
1162 case ImmTyDone: OS <<
"Done";
break;
1163 case ImmTyRowEn: OS <<
"RowEn";
break;
1164 case ImmTyHwreg: OS <<
"Hwreg";
break;
1165 case ImmTySendMsg: OS <<
"SendMsg";
break;
1166 case ImmTyWaitEvent: OS <<
"WaitEvent";
break;
1167 case ImmTyInterpSlot: OS <<
"InterpSlot";
break;
1168 case ImmTyInterpAttr: OS <<
"InterpAttr";
break;
1169 case ImmTyInterpAttrChan: OS <<
"InterpAttrChan";
break;
1170 case ImmTyOpSel: OS <<
"OpSel";
break;
1171 case ImmTyOpSelHi: OS <<
"OpSelHi";
break;
1172 case ImmTyNegLo: OS <<
"NegLo";
break;
1173 case ImmTyNegHi: OS <<
"NegHi";
break;
1174 case ImmTySwizzle: OS <<
"Swizzle";
break;
1175 case ImmTyGprIdxMode: OS <<
"GprIdxMode";
break;
1176 case ImmTyHigh: OS <<
"High";
break;
1177 case ImmTyBLGP: OS <<
"BLGP";
break;
1178 case ImmTyCBSZ: OS <<
"CBSZ";
break;
1179 case ImmTyABID: OS <<
"ABID";
break;
1180 case ImmTyEndpgm: OS <<
"Endpgm";
break;
1181 case ImmTyWaitVDST: OS <<
"WaitVDST";
break;
1182 case ImmTyWaitEXP: OS <<
"WaitEXP";
break;
1183 case ImmTyWaitVAVDst: OS <<
"WaitVAVDst";
break;
1184 case ImmTyWaitVMVSrc: OS <<
"WaitVMVSrc";
break;
1185 case ImmTyBitOp3: OS <<
"BitOp3";
break;
1186 case ImmTyMatrixAFMT: OS <<
"ImmTyMatrixAFMT";
break;
1187 case ImmTyMatrixBFMT: OS <<
"ImmTyMatrixBFMT";
break;
1188 case ImmTyMatrixAScale: OS <<
"ImmTyMatrixAScale";
break;
1189 case ImmTyMatrixBScale: OS <<
"ImmTyMatrixBScale";
break;
1190 case ImmTyMatrixAScaleFmt: OS <<
"ImmTyMatrixAScaleFmt";
break;
1191 case ImmTyMatrixBScaleFmt: OS <<
"ImmTyMatrixBScaleFmt";
break;
1192 case ImmTyMatrixAReuse: OS <<
"ImmTyMatrixAReuse";
break;
1193 case ImmTyMatrixBReuse: OS <<
"ImmTyMatrixBReuse";
break;
1194 case ImmTyScaleSel: OS <<
"ScaleSel" ;
break;
1195 case ImmTyByteSel: OS <<
"ByteSel" ;
break;
1200 void print(raw_ostream &OS,
const MCAsmInfo &MAI)
const override {
1204 <<
" mods: " <<
Reg.Mods <<
'>';
1208 if (getImmTy() != ImmTyNone) {
1209 OS <<
" type: "; printImmTy(OS, getImmTy());
1211 OS <<
" mods: " <<
Imm.Mods <<
'>';
1224 static AMDGPUOperand::Ptr CreateImm(
const AMDGPUAsmParser *AsmParser,
1225 int64_t Val, SMLoc Loc,
1226 ImmTy
Type = ImmTyNone,
1227 bool IsFPImm =
false) {
1228 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1230 Op->Imm.IsFPImm = IsFPImm;
1232 Op->Imm.Mods = Modifiers();
1238 static AMDGPUOperand::Ptr CreateToken(
const AMDGPUAsmParser *AsmParser,
1239 StringRef Str, SMLoc Loc,
1240 bool HasExplicitEncodingSize =
true) {
1241 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1242 Res->Tok.Data = Str.data();
1243 Res->Tok.Length = Str.size();
1244 Res->StartLoc = Loc;
1249 static AMDGPUOperand::Ptr CreateReg(
const AMDGPUAsmParser *AsmParser,
1250 MCRegister
Reg, SMLoc S, SMLoc
E) {
1251 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1252 Op->Reg.RegNo =
Reg;
1253 Op->Reg.Mods = Modifiers();
1259 static AMDGPUOperand::Ptr CreateExpr(
const AMDGPUAsmParser *AsmParser,
1260 const class MCExpr *Expr, SMLoc S) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1270 OS <<
"abs:" << Mods.Abs <<
" neg: " << Mods.Neg <<
" sext:" << Mods.Sext;
1279#define GET_REGISTER_MATCHER
1280#include "AMDGPUGenAsmMatcher.inc"
1281#undef GET_REGISTER_MATCHER
1282#undef GET_SUBTARGET_FEATURE_NAME
1287class KernelScopeInfo {
1288 int SgprIndexUnusedMin = -1;
1289 int VgprIndexUnusedMin = -1;
1290 int AgprIndexUnusedMin = -1;
1294 void usesSgprAt(
int i) {
1295 if (i >= SgprIndexUnusedMin) {
1296 SgprIndexUnusedMin = ++i;
1299 Ctx->getOrCreateSymbol(
Twine(
".kernel.sgpr_count"));
1305 void usesVgprAt(
int i) {
1306 if (i >= VgprIndexUnusedMin) {
1307 VgprIndexUnusedMin = ++i;
1310 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1312 VgprIndexUnusedMin);
1318 void usesAgprAt(
int i) {
1323 if (i >= AgprIndexUnusedMin) {
1324 AgprIndexUnusedMin = ++i;
1327 Ctx->getOrCreateSymbol(
Twine(
".kernel.agpr_count"));
1332 Ctx->getOrCreateSymbol(
Twine(
".kernel.vgpr_count"));
1334 VgprIndexUnusedMin);
1341 KernelScopeInfo() =
default;
1345 MSTI = Ctx->getSubtargetInfo();
1347 usesSgprAt(SgprIndexUnusedMin = -1);
1348 usesVgprAt(VgprIndexUnusedMin = -1);
1350 usesAgprAt(AgprIndexUnusedMin = -1);
1354 void usesRegister(RegisterKind RegKind,
unsigned DwordRegIndex,
1355 unsigned RegWidth) {
1358 usesSgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1361 usesAgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1364 usesVgprAt(DwordRegIndex +
divideCeil(RegWidth, 32) - 1);
1373 MCAsmParser &Parser;
1375 unsigned ForcedEncodingSize = 0;
1376 bool ForcedDPP =
false;
1377 bool ForcedSDWA =
false;
1378 KernelScopeInfo KernelScope;
1379 const unsigned HwMode;
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1390 unsigned getRegOperandSize(
const MCInstrDesc &
Desc,
unsigned OpNo)
const {
1392 int16_t RCID = MII.getOpRegClassID(
Desc.operands()[OpNo], HwMode);
1396 std::optional<AMDGPU::InfoSectionData> InfoData;
1399 void createConstantSymbol(StringRef Id, int64_t Val);
1401 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1402 bool OutOfRangeError(SMRange
Range);
1418 bool calculateGPRBlocks(
const FeatureBitset &Features,
const MCExpr *VCCUsed,
1419 const MCExpr *FlatScrUsed,
bool XNACKUsed,
1420 std::optional<bool> EnableWavefrontSize32,
1421 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1422 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1423 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks);
1424 bool ParseDirectiveAMDGCNTarget();
1425 bool ParseDirectiveAMDHSACodeObjectVersion();
1426 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseAMDKernelCodeTValue(StringRef
ID, AMDGPUMCKernelCodeT &Header);
1428 bool ParseDirectiveAMDKernelCodeT();
1430 bool subtargetHasRegister(
const MCRegisterInfo &MRI, MCRegister
Reg);
1431 bool ParseDirectiveAMDGPUHsaKernel();
1433 bool ParseDirectiveISAVersion();
1434 bool ParseDirectiveHSAMetadata();
1435 bool ParseDirectivePALMetadataBegin();
1436 bool ParseDirectivePALMetadata();
1437 bool ParseDirectiveAMDGPULDS();
1438 bool ParseDirectiveAMDGPUInfo();
1442 bool ParseToEndDirective(
const char *AssemblerDirectiveBegin,
1443 const char *AssemblerDirectiveEnd,
1444 std::string &CollectString);
1446 bool AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
1447 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1448 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1449 unsigned &RegNum,
unsigned &RegWidth,
1450 bool RestoreOnFailure =
false);
1451 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &
Reg,
1452 unsigned &RegNum,
unsigned &RegWidth,
1453 SmallVectorImpl<AsmToken> &Tokens);
1454 MCRegister ParseRegularReg(RegisterKind &RegKind,
unsigned &RegNum,
1456 SmallVectorImpl<AsmToken> &Tokens);
1457 MCRegister ParseSpecialReg(RegisterKind &RegKind,
unsigned &RegNum,
1459 SmallVectorImpl<AsmToken> &Tokens);
1460 MCRegister ParseRegList(RegisterKind &RegKind,
unsigned &RegNum,
1462 SmallVectorImpl<AsmToken> &Tokens);
1463 bool ParseRegRange(
unsigned &Num,
unsigned &Width,
unsigned &SubReg);
1464 MCRegister getRegularReg(RegisterKind RegKind,
unsigned RegNum,
1465 unsigned SubReg,
unsigned RegWidth, SMLoc Loc);
1468 bool isRegister(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1469 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1470 void initializeGprCountSymbol(RegisterKind RegKind);
1471 bool updateGprCountSymbols(RegisterKind RegKind,
unsigned DwordRegIndex,
1473 void cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
1478 OperandMode_Default,
1482 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1484 AMDGPUAsmParser(
const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1485 const MCInstrInfo &MII)
1486 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1487 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1490 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1494 createConstantSymbol(
".amdgcn.gfx_generation_number",
ISA.Major);
1495 createConstantSymbol(
".amdgcn.gfx_generation_minor",
ISA.Minor);
1496 createConstantSymbol(
".amdgcn.gfx_generation_stepping",
ISA.Stepping);
1498 createConstantSymbol(
".option.machine_version_major",
ISA.Major);
1499 createConstantSymbol(
".option.machine_version_minor",
ISA.Minor);
1500 createConstantSymbol(
".option.machine_version_stepping",
ISA.Stepping);
1503 initializeGprCountSymbol(IS_VGPR);
1504 initializeGprCountSymbol(IS_SGPR);
1509 createConstantSymbol(Symbol, Code);
1511 createConstantSymbol(
"UC_VERSION_W64_BIT", 0x2000);
1512 createConstantSymbol(
"UC_VERSION_W32_BIT", 0x4000);
1513 createConstantSymbol(
"UC_VERSION_MDP_BIT", 0x8000);
1591 bool isWave32()
const {
return getAvailableFeatures()[Feature_isWave32Bit]; }
1593 bool isWave64()
const {
return getAvailableFeatures()[Feature_isWave64Bit]; }
1595 bool hasInv2PiInlineImm()
const {
1596 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1599 bool has64BitLiterals()
const {
1600 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1603 bool hasFlatOffsets()
const {
1604 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1607 bool hasTrue16Insts()
const {
1608 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1612 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1615 bool hasSGPR102_SGPR103()
const {
1619 bool hasSGPR104_SGPR105()
const {
return isGFX10Plus(); }
1621 bool hasIntClamp()
const {
1622 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1625 bool hasPartialNSAEncoding()
const {
1626 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1629 bool hasGloballyAddressableScratch()
const {
1630 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1643 AMDGPUTargetStreamer &getTargetStreamer() {
1644 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1645 return static_cast<AMDGPUTargetStreamer &
>(TS);
1651 return const_cast<AMDGPUAsmParser *
>(
this)->MCTargetAsmParser::getContext();
1654 const MCRegisterInfo *getMRI()
const {
1658 const MCInstrInfo *getMII()
const {
1664 const FeatureBitset &getFeatureBits()
const {
1665 return getSTI().getFeatureBits();
1668 void setForcedEncodingSize(
unsigned Size) { ForcedEncodingSize =
Size; }
1669 void setForcedDPP(
bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1670 void setForcedSDWA(
bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1672 unsigned getForcedEncodingSize()
const {
return ForcedEncodingSize; }
1673 bool isForcedVOP3()
const {
return ForcedEncodingSize == 64; }
1674 bool isForcedDPP()
const {
return ForcedDPP; }
1675 bool isForcedSDWA()
const {
return ForcedSDWA; }
1676 ArrayRef<unsigned> getMatchedVariants()
const;
1677 StringRef getMatchedVariantName()
const;
1679 std::unique_ptr<AMDGPUOperand> parseRegister(
bool RestoreOnFailure =
false);
1680 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1681 bool RestoreOnFailure);
1682 bool parseRegister(MCRegister &
Reg, SMLoc &StartLoc, SMLoc &EndLoc)
override;
1683 ParseStatus tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
1684 SMLoc &EndLoc)
override;
1685 unsigned checkTargetMatchPredicate(MCInst &Inst)
override;
1686 unsigned validateTargetOperandClass(MCParsedAsmOperand &
Op,
1687 unsigned Kind)
override;
1688 bool matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
1690 uint64_t &ErrorInfo,
1691 bool MatchingInlineAsm)
override;
1692 bool ParseDirective(AsmToken DirectiveID)
override;
1693 void onEndOfFile()
override;
1694 ParseStatus parseOperand(
OperandVector &Operands, StringRef Mnemonic,
1695 OperandMode
Mode = OperandMode_Default);
1696 StringRef parseMnemonicSuffix(StringRef Name);
1697 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1701 ParseStatus parseTokenOp(StringRef Name,
OperandVector &Operands);
1703 ParseStatus parseIntWithPrefix(
const char *Prefix, int64_t &
Int);
1706 parseIntWithPrefix(
const char *Prefix,
OperandVector &Operands,
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 std::function<
bool(int64_t &)> ConvertResult =
nullptr);
1710 ParseStatus parseOperandArrayWithPrefix(
1712 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1713 bool (*ConvertResult)(int64_t &) =
nullptr);
1717 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1718 bool IgnoreNegative =
false);
1719 unsigned getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling)
const;
1721 ParseStatus parseScope(
OperandVector &Operands, int64_t &Scope);
1723 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &
Value,
1725 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1727 ArrayRef<const char *> Ids,
1729 ParseStatus parseStringOrIntWithPrefix(
OperandVector &Operands,
1731 ArrayRef<const char *> Ids,
1732 AMDGPUOperand::ImmTy
Type);
1735 bool isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1736 bool isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1737 bool isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1738 bool isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const;
1739 bool parseSP3NegModifier();
1740 ParseStatus parseImm(
OperandVector &Operands,
bool HasSP3AbsModifier =
false,
1743 ParseStatus parseRegOrImm(
OperandVector &Operands,
bool HasSP3AbsMod =
false,
1745 ParseStatus parseRegOrImmWithFPInputMods(
OperandVector &Operands,
1746 bool AllowImm =
true);
1747 ParseStatus parseRegOrImmWithIntInputMods(
OperandVector &Operands,
1748 bool AllowImm =
true);
1749 ParseStatus parseRegWithFPInputMods(
OperandVector &Operands);
1750 ParseStatus parseRegWithIntInputMods(
OperandVector &Operands);
1753 AMDGPUOperand::ImmTy ImmTy);
1757 ParseStatus tryParseMatrixFMT(
OperandVector &Operands, StringRef Name,
1758 AMDGPUOperand::ImmTy
Type);
1761 ParseStatus tryParseMatrixScale(
OperandVector &Operands, StringRef Name,
1762 AMDGPUOperand::ImmTy
Type);
1765 ParseStatus tryParseMatrixScaleFmt(
OperandVector &Operands, StringRef Name,
1766 AMDGPUOperand::ImmTy
Type);
1770 ParseStatus parseDfmtNfmt(int64_t &
Format);
1771 ParseStatus parseUfmt(int64_t &
Format);
1772 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1774 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1777 ParseStatus parseSymbolicOrNumericFormat(int64_t &
Format);
1778 ParseStatus parseNumericFormat(int64_t &
Format);
1782 bool tryParseFmt(
const char *Pref, int64_t MaxVal, int64_t &Val);
1783 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1787 bool parseCnt(int64_t &IntVal);
1790 bool parseDepCtr(int64_t &IntVal,
unsigned &Mask);
1791 void depCtrError(SMLoc Loc,
int ErrorId, StringRef DepCtrName);
1794 bool parseDelay(int64_t &Delay);
1800 struct OperandInfoTy {
1803 bool IsSymbolic =
false;
1804 bool IsDefined =
false;
1806 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1809 struct StructuredOpField : OperandInfoTy {
1813 bool IsDefined =
false;
1815 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1816 unsigned Width, int64_t
Default)
1817 : OperandInfoTy(
Default), Id(Id), Desc(Desc), Width(Width) {}
1818 virtual ~StructuredOpField() =
default;
1820 bool Error(AMDGPUAsmParser &Parser,
const Twine &Err)
const {
1821 Parser.Error(Loc,
"invalid " + Desc +
": " + Err);
1825 virtual bool validate(AMDGPUAsmParser &Parser)
const {
1827 return Error(Parser,
"not supported on this GPU");
1829 return Error(Parser,
"only " + Twine(Width) +
"-bit values are legal");
1837 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &
Op, OperandInfoTy &Stream);
1838 bool validateSendMsg(
const OperandInfoTy &Msg,
1839 const OperandInfoTy &
Op,
1840 const OperandInfoTy &Stream);
1842 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &
Offset,
1843 OperandInfoTy &Width);
1845 const AMDGPUOperand &findMCOperand(
const OperandVector &Operands,
1848 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1850 SMLoc getFlatOffsetLoc(
const OperandVector &Operands)
const;
1851 SMLoc getSMEMOffsetLoc(
const OperandVector &Operands)
const;
1854 SMLoc getOperandLoc(
const OperandVector &Operands,
int MCOpIdx)
const;
1855 SMLoc getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
1857 SMLoc getImmLoc(AMDGPUOperand::ImmTy
Type,
1861 bool validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
1863 bool validateOffset(
const MCInst &Inst,
const OperandVector &Operands);
1864 bool validateFlatOffset(
const MCInst &Inst,
const OperandVector &Operands);
1865 bool validateSMEMOffset(
const MCInst &Inst,
const OperandVector &Operands);
1866 bool validateSOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1867 bool validateConstantBusLimitations(
const MCInst &Inst,
const OperandVector &Operands);
1868 std::optional<unsigned> checkVOPDRegBankConstraints(
const MCInst &Inst,
1870 bool validateVOPD(
const MCInst &Inst,
const OperandVector &Operands);
1871 bool tryVOPD(
const MCInst &Inst);
1872 bool tryVOPD3(
const MCInst &Inst);
1873 bool tryAnotherVOPDEncoding(
const MCInst &Inst);
1875 bool validateIntClampSupported(
const MCInst &Inst);
1876 bool validateMIMGAtomicDMask(
const MCInst &Inst);
1877 bool validateMIMGGatherDMask(
const MCInst &Inst);
1878 bool validateMovrels(
const MCInst &Inst,
const OperandVector &Operands);
1879 bool validateMIMGDataSize(
const MCInst &Inst, SMLoc IDLoc);
1880 bool validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc);
1881 bool validateMIMGD16(
const MCInst &Inst);
1882 bool validateMIMGDim(
const MCInst &Inst,
const OperandVector &Operands);
1883 bool validateTensorR128(
const MCInst &Inst);
1884 bool validateMIMGMSAA(
const MCInst &Inst);
1885 bool validateOpSel(
const MCInst &Inst);
1886 bool validateTrue16OpSel(
const MCInst &Inst);
1887 bool validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName);
1888 bool validateDPP(
const MCInst &Inst,
const OperandVector &Operands);
1889 bool validateVccOperand(MCRegister
Reg)
const;
1890 bool validateVOPLiteral(
const MCInst &Inst,
const OperandVector &Operands);
1891 bool validateMAIAccWrite(
const MCInst &Inst,
const OperandVector &Operands);
1892 bool validateMAISrc2(
const MCInst &Inst,
const OperandVector &Operands);
1893 bool validateMFMA(
const MCInst &Inst,
const OperandVector &Operands);
1894 bool validateAGPRLdSt(
const MCInst &Inst)
const;
1895 bool validateVGPRAlign(
const MCInst &Inst)
const;
1896 bool validateBLGP(
const MCInst &Inst,
const OperandVector &Operands);
1897 bool validateDS(
const MCInst &Inst,
const OperandVector &Operands);
1898 bool validateGWS(
const MCInst &Inst,
const OperandVector &Operands);
1899 bool validateDivScale(
const MCInst &Inst);
1900 bool validateWaitCnt(
const MCInst &Inst,
const OperandVector &Operands);
1901 bool validateCoherencyBits(
const MCInst &Inst,
const OperandVector &Operands,
1903 bool validateTHAndScopeBits(
const MCInst &Inst,
const OperandVector &Operands,
1904 const unsigned CPol);
1905 bool validateTFE(
const MCInst &Inst,
const OperandVector &Operands);
1906 bool validateLdsDirect(
const MCInst &Inst,
const OperandVector &Operands);
1907 bool validateWMMA(
const MCInst &Inst,
const OperandVector &Operands);
1908 unsigned getConstantBusLimit(
unsigned Opcode)
const;
1909 bool usesConstantBus(
const MCInst &Inst,
unsigned OpIdx);
1910 bool isInlineConstant(
const MCInst &Inst,
unsigned OpIdx)
const;
1911 MCRegister findImplicitSGPRReadInVOP(
const MCInst &Inst)
const;
1913 bool isSupportedMnemo(StringRef Mnemo,
1914 const FeatureBitset &FBS);
1915 bool isSupportedMnemo(StringRef Mnemo,
1916 const FeatureBitset &FBS,
1917 ArrayRef<unsigned> Variants);
1918 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1920 bool isId(
const StringRef Id)
const;
1921 bool isId(
const AsmToken &Token,
const StringRef Id)
const;
1923 StringRef getId()
const;
1924 bool trySkipId(
const StringRef Id);
1925 bool trySkipId(
const StringRef Pref,
const StringRef Id);
1929 bool parseString(StringRef &Val,
const StringRef ErrMsg =
"expected a string");
1930 bool parseId(StringRef &Val,
const StringRef ErrMsg =
"");
1936 StringRef getTokenStr()
const;
1937 AsmToken peekToken(
bool ShouldSkipSpace =
true);
1939 SMLoc getLoc()
const;
1943 void onBeginOfFile()
override;
1944 bool parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc)
override;
1946 ParseStatus parseCustomOperand(
OperandVector &Operands,
unsigned MCK);
1956 bool parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
1957 const unsigned MaxVal,
const Twine &ErrMsg,
1959 bool parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
1960 const unsigned MinVal,
1961 const unsigned MaxVal,
1962 const StringRef ErrMsg);
1964 bool parseSwizzleOffset(int64_t &
Imm);
1965 bool parseSwizzleMacro(int64_t &
Imm);
1966 bool parseSwizzleQuadPerm(int64_t &
Imm);
1967 bool parseSwizzleBitmaskPerm(int64_t &
Imm);
1968 bool parseSwizzleBroadcast(int64_t &
Imm);
1969 bool parseSwizzleSwap(int64_t &
Imm);
1970 bool parseSwizzleReverse(int64_t &
Imm);
1971 bool parseSwizzleFFT(int64_t &
Imm);
1972 bool parseSwizzleRotate(int64_t &
Imm);
1975 int64_t parseGPRIdxMacro();
1977 void cvtMubuf(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
false); }
1978 void cvtMubufAtomic(MCInst &Inst,
const OperandVector &Operands) { cvtMubufImpl(Inst, Operands,
true); }
1983 OptionalImmIndexMap &OptionalIdx);
1984 void cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands);
1985 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands);
1988 void cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands);
1991 void cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
1992 OptionalImmIndexMap &OptionalIdx);
1994 OptionalImmIndexMap &OptionalIdx);
1996 void cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands);
1997 void cvtVINTERP(MCInst &Inst,
const OperandVector &Operands);
1998 void cvtOpSelHelper(MCInst &Inst,
unsigned OpSel);
2000 bool parseDimId(
unsigned &Encoding);
2002 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2005 bool isSupportedDPPCtrl(StringRef Ctrl,
const OperandVector &Operands);
2006 int64_t parseDPPCtrlSel(StringRef Ctrl);
2007 int64_t parseDPPCtrlPerm();
2008 void cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8 =
false);
2010 cvtDPP(Inst, Operands,
true);
2012 void cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
2013 bool IsDPP8 =
false);
2014 void cvtVOP3DPP8(MCInst &Inst,
const OperandVector &Operands) {
2015 cvtVOP3DPP(Inst, Operands,
true);
2018 ParseStatus parseSDWASel(
OperandVector &Operands, StringRef Prefix,
2019 AMDGPUOperand::ImmTy
Type);
2021 void cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands);
2022 void cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands);
2023 void cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands);
2024 void cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands);
2025 void cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands);
2027 uint64_t BasicInstType,
2028 bool SkipDstVcc =
false,
2029 bool SkipSrcVcc =
false);
2138bool AMDGPUOperand::isInlinableImm(
MVT type)
const {
2148 if (!isImmTy(ImmTyNone)) {
2153 if (getModifiers().
Lit != LitModifier::None)
2163 if (type == MVT::f64 || type == MVT::i64) {
2165 AsmParser->hasInv2PiInlineImm());
2168 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2187 APFloat::rmNearestTiesToEven, &Lost);
2194 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2196 AsmParser->hasInv2PiInlineImm());
2201 static_cast<int32_t
>(FPLiteral.bitcastToAPInt().getZExtValue()),
2202 AsmParser->hasInv2PiInlineImm());
2206 if (type == MVT::f64 || type == MVT::i64) {
2208 AsmParser->hasInv2PiInlineImm());
2217 static_cast<int16_t
>(
Literal.getLoBits(16).getSExtValue()),
2218 type, AsmParser->hasInv2PiInlineImm());
2222 static_cast<int32_t
>(
Literal.getLoBits(32).getZExtValue()),
2223 AsmParser->hasInv2PiInlineImm());
2226bool AMDGPUOperand::isLiteralImm(MVT type)
const {
2228 if (!isImmTy(ImmTyNone)) {
2233 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2238 if (type == MVT::f64 && hasFPModifiers()) {
2258 if (type == MVT::f64) {
2263 if (type == MVT::i64) {
2276 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2277 : (type == MVT::v2i16) ? MVT::f32
2278 : (type == MVT::v2f32) ? MVT::f32
2281 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64,
Imm.Val));
2285bool AMDGPUOperand::isRegClass(
unsigned RCID)
const {
2286 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(
getReg());
2289bool AMDGPUOperand::isVRegWithInputMods()
const {
2290 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2292 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2293 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2296template <
bool IsFake16>
2297bool AMDGPUOperand::isT16_Lo128VRegWithInputMods()
const {
2298 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2299 : AMDGPU::VGPR_16_Lo128RegClassID);
2302template <
bool IsFake16>
bool AMDGPUOperand::isT16VRegWithInputMods()
const {
2303 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2304 : AMDGPU::VGPR_16RegClassID);
2307bool AMDGPUOperand::isSDWAOperand(MVT type)
const {
2308 if (AsmParser->isVI())
2310 if (AsmParser->isGFX9Plus())
2311 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2315bool AMDGPUOperand::isSDWAFP16Operand()
const {
2316 return isSDWAOperand(MVT::f16);
2319bool AMDGPUOperand::isSDWAFP32Operand()
const {
2320 return isSDWAOperand(MVT::f32);
2323bool AMDGPUOperand::isSDWAInt16Operand()
const {
2324 return isSDWAOperand(MVT::i16);
2327bool AMDGPUOperand::isSDWAInt32Operand()
const {
2328 return isSDWAOperand(MVT::i32);
2331bool AMDGPUOperand::isBoolReg()
const {
2332 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2333 (AsmParser->isWave32() && isSCSrc_b32()));
2336uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val,
unsigned Size)
const
2338 assert(isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2341 const uint64_t FpSignMask = (1ULL << (
Size * 8 - 1));
2353void AMDGPUOperand::addImmOperands(MCInst &Inst,
unsigned N,
bool ApplyModifiers)
const {
2363 addLiteralImmOperand(Inst,
Imm.Val,
2365 isImmTy(ImmTyNone) &&
Imm.Mods.hasFPModifiers());
2367 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2372void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val,
bool ApplyModifiers)
const {
2373 const auto& InstDesc = AsmParser->getMII()->get(Inst.
getOpcode());
2378 if (ApplyModifiers) {
2381 Val = applyInputFPModifiers(Val,
Size);
2385 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2387 bool CanUse64BitLiterals =
2388 AsmParser->has64BitLiterals() &&
2391 MCContext &Ctx = AsmParser->getContext();
2400 if (
Lit == LitModifier::None &&
2402 AsmParser->hasInv2PiInlineImm())) {
2410 bool HasMandatoryLiteral =
2413 if (
Literal.getLoBits(32) != 0 &&
2414 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2415 !HasMandatoryLiteral) {
2416 const_cast<AMDGPUAsmParser *
>(AsmParser)->
Warning(
2418 "Can't encode literal as exact 64-bit floating-point operand. "
2419 "Low 32-bits will be set to zero");
2420 Val &= 0xffffffff00000000u;
2426 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2432 Lit = LitModifier::Lit64;
2433 }
else if (
Lit == LitModifier::Lit) {
2447 if (CanUse64BitLiterals &&
Lit == LitModifier::None &&
2449 Lit = LitModifier::Lit64;
2456 if (
Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2457 Literal == 0x3fc45f306725feed) {
2492 APFloat::rmNearestTiesToEven, &lost);
2496 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2503 if (
Lit != LitModifier::None) {
2533 if (
Lit == LitModifier::None &&
2543 if (!AsmParser->has64BitLiterals() ||
Lit == LitModifier::Lit)
2550 if (
Lit == LitModifier::None &&
2558 if (!AsmParser->has64BitLiterals()) {
2559 Val =
static_cast<uint64_t
>(Val) << 32;
2566 if (
Lit == LitModifier::Lit ||
2568 Val =
static_cast<uint64_t
>(Val) << 32;
2572 if (
Lit == LitModifier::Lit)
2598 if (
Lit != LitModifier::None) {
2606void AMDGPUOperand::addRegOperands(MCInst &Inst,
unsigned N)
const {
2611bool AMDGPUOperand::isInlineValue()
const {
2619void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2630 if (Is == IS_VGPR) {
2634 return AMDGPU::VGPR_32RegClassID;
2636 return AMDGPU::VReg_64RegClassID;
2638 return AMDGPU::VReg_96RegClassID;
2640 return AMDGPU::VReg_128RegClassID;
2642 return AMDGPU::VReg_160RegClassID;
2644 return AMDGPU::VReg_192RegClassID;
2646 return AMDGPU::VReg_224RegClassID;
2648 return AMDGPU::VReg_256RegClassID;
2650 return AMDGPU::VReg_288RegClassID;
2652 return AMDGPU::VReg_320RegClassID;
2654 return AMDGPU::VReg_352RegClassID;
2656 return AMDGPU::VReg_384RegClassID;
2658 return AMDGPU::VReg_512RegClassID;
2660 return AMDGPU::VReg_1024RegClassID;
2662 }
else if (Is == IS_TTMP) {
2666 return AMDGPU::TTMP_32RegClassID;
2668 return AMDGPU::TTMP_64RegClassID;
2670 return AMDGPU::TTMP_128RegClassID;
2672 return AMDGPU::TTMP_256RegClassID;
2674 return AMDGPU::TTMP_512RegClassID;
2676 }
else if (Is == IS_SGPR) {
2680 return AMDGPU::SGPR_32RegClassID;
2682 return AMDGPU::SGPR_64RegClassID;
2684 return AMDGPU::SGPR_96RegClassID;
2686 return AMDGPU::SGPR_128RegClassID;
2688 return AMDGPU::SGPR_160RegClassID;
2690 return AMDGPU::SGPR_192RegClassID;
2692 return AMDGPU::SGPR_224RegClassID;
2694 return AMDGPU::SGPR_256RegClassID;
2696 return AMDGPU::SGPR_288RegClassID;
2698 return AMDGPU::SGPR_320RegClassID;
2700 return AMDGPU::SGPR_352RegClassID;
2702 return AMDGPU::SGPR_384RegClassID;
2704 return AMDGPU::SGPR_512RegClassID;
2706 }
else if (Is == IS_AGPR) {
2710 return AMDGPU::AGPR_32RegClassID;
2712 return AMDGPU::AReg_64RegClassID;
2714 return AMDGPU::AReg_96RegClassID;
2716 return AMDGPU::AReg_128RegClassID;
2718 return AMDGPU::AReg_160RegClassID;
2720 return AMDGPU::AReg_192RegClassID;
2722 return AMDGPU::AReg_224RegClassID;
2724 return AMDGPU::AReg_256RegClassID;
2726 return AMDGPU::AReg_288RegClassID;
2728 return AMDGPU::AReg_320RegClassID;
2730 return AMDGPU::AReg_352RegClassID;
2732 return AMDGPU::AReg_384RegClassID;
2734 return AMDGPU::AReg_512RegClassID;
2736 return AMDGPU::AReg_1024RegClassID;
2744 .
Case(
"exec", AMDGPU::EXEC)
2745 .
Case(
"vcc", AMDGPU::VCC)
2746 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
2747 .
Case(
"xnack_mask", AMDGPU::XNACK_MASK)
2748 .
Case(
"shared_base", AMDGPU::SRC_SHARED_BASE)
2749 .
Case(
"src_shared_base", AMDGPU::SRC_SHARED_BASE)
2750 .
Case(
"shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2751 .
Case(
"src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2752 .
Case(
"private_base", AMDGPU::SRC_PRIVATE_BASE)
2753 .
Case(
"src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2754 .
Case(
"private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2755 .
Case(
"src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2756 .
Case(
"src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2757 .
Case(
"src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2758 .
Case(
"pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2759 .
Case(
"src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2760 .
Case(
"lds_direct", AMDGPU::LDS_DIRECT)
2761 .
Case(
"src_lds_direct", AMDGPU::LDS_DIRECT)
2762 .
Case(
"m0", AMDGPU::M0)
2763 .
Case(
"vccz", AMDGPU::SRC_VCCZ)
2764 .
Case(
"src_vccz", AMDGPU::SRC_VCCZ)
2765 .
Case(
"execz", AMDGPU::SRC_EXECZ)
2766 .
Case(
"src_execz", AMDGPU::SRC_EXECZ)
2767 .
Case(
"scc", AMDGPU::SRC_SCC)
2768 .
Case(
"src_scc", AMDGPU::SRC_SCC)
2769 .
Case(
"tba", AMDGPU::TBA)
2770 .
Case(
"tma", AMDGPU::TMA)
2771 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2772 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2773 .
Case(
"xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2774 .
Case(
"xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2775 .
Case(
"vcc_lo", AMDGPU::VCC_LO)
2776 .
Case(
"vcc_hi", AMDGPU::VCC_HI)
2777 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
2778 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
2779 .
Case(
"tma_lo", AMDGPU::TMA_LO)
2780 .
Case(
"tma_hi", AMDGPU::TMA_HI)
2781 .
Case(
"tba_lo", AMDGPU::TBA_LO)
2782 .
Case(
"tba_hi", AMDGPU::TBA_HI)
2783 .
Case(
"pc", AMDGPU::PC_REG)
2784 .
Case(
"null", AMDGPU::SGPR_NULL)
2788bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2789 SMLoc &EndLoc,
bool RestoreOnFailure) {
2790 auto R = parseRegister();
2791 if (!R)
return true;
2793 RegNo =
R->getReg();
2794 StartLoc =
R->getStartLoc();
2795 EndLoc =
R->getEndLoc();
2799bool AMDGPUAsmParser::parseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2801 return ParseRegister(
Reg, StartLoc, EndLoc,
false);
2804ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &
Reg, SMLoc &StartLoc,
2806 bool Result = ParseRegister(
Reg, StartLoc, EndLoc,
true);
2807 bool PendingErrors = getParser().hasPendingError();
2808 getParser().clearPendingErrors();
2816bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &
Reg,
unsigned &RegWidth,
2817 RegisterKind RegKind,
2818 MCRegister Reg1, SMLoc Loc) {
2821 if (
Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2826 if (
Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2827 Reg = AMDGPU::FLAT_SCR;
2831 if (
Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2832 Reg = AMDGPU::XNACK_MASK;
2836 if (
Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2841 if (
Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2846 if (
Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2851 Error(Loc,
"register does not fit in the list");
2857 if (Reg1 !=
Reg + RegWidth / 32) {
2858 Error(Loc,
"registers in a list must have consecutive indices");
2876 {{
"ttmp"}, IS_TTMP},
2882 return Kind == IS_VGPR ||
2890 if (Str.starts_with(
Reg.Name))
2896 return !Str.getAsInteger(10, Num);
2900AMDGPUAsmParser::isRegister(
const AsmToken &Token,
2901 const AsmToken &NextToken)
const {
2916 StringRef RegSuffix = Str.substr(
RegName.size());
2917 if (!RegSuffix.
empty()) {
2935AMDGPUAsmParser::isRegister()
2937 return isRegister(
getToken(), peekToken());
2940MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
unsigned RegNum,
2941 unsigned SubReg,
unsigned RegWidth,
2945 unsigned AlignSize = 1;
2946 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2952 if (RegNum % AlignSize != 0) {
2953 Error(Loc,
"invalid register alignment");
2954 return MCRegister();
2957 unsigned RegIdx = RegNum / AlignSize;
2960 Error(Loc,
"invalid or unsupported register size");
2961 return MCRegister();
2965 const MCRegisterClass RC =
TRI->getRegClass(RCID);
2966 if (RegIdx >= RC.
getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2967 Error(Loc,
"register index is out of range");
2968 return AMDGPU::NoRegister;
2971 if (RegKind == IS_VGPR && !
isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2972 Error(Loc,
"register index is out of range");
2973 return MCRegister();
2989bool AMDGPUAsmParser::ParseRegRange(
unsigned &Num,
unsigned &RegWidth,
2991 int64_t RegLo, RegHi;
2995 SMLoc FirstIdxLoc = getLoc();
3002 SecondIdxLoc = getLoc();
3013 Error(FirstIdxLoc,
"invalid register index");
3018 Error(SecondIdxLoc,
"invalid register index");
3022 if (RegLo > RegHi) {
3023 Error(FirstIdxLoc,
"first register index should not exceed second index");
3027 if (RegHi == RegLo) {
3028 StringRef RegSuffix = getTokenStr();
3029 if (RegSuffix ==
".l") {
3030 SubReg = AMDGPU::lo16;
3032 }
else if (RegSuffix ==
".h") {
3033 SubReg = AMDGPU::hi16;
3038 Num =
static_cast<unsigned>(RegLo);
3039 RegWidth = 32 * ((RegHi - RegLo) + 1);
3044MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3047 SmallVectorImpl<AsmToken> &Tokens) {
3053 RegKind = IS_SPECIAL;
3060MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3063 SmallVectorImpl<AsmToken> &Tokens) {
3065 StringRef
RegName = getTokenStr();
3066 auto Loc = getLoc();
3070 Error(Loc,
"invalid register name");
3071 return MCRegister();
3079 unsigned SubReg = NoSubRegister;
3080 if (!RegSuffix.
empty()) {
3082 SubReg = AMDGPU::lo16;
3084 SubReg = AMDGPU::hi16;
3088 Error(Loc,
"invalid register index");
3089 return MCRegister();
3094 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3095 return MCRegister();
3098 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3101MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3102 unsigned &RegNum,
unsigned &RegWidth,
3103 SmallVectorImpl<AsmToken> &Tokens) {
3105 auto ListLoc = getLoc();
3108 "expected a register or a list of registers")) {
3109 return MCRegister();
3114 auto Loc = getLoc();
3115 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth))
3116 return MCRegister();
3117 if (RegWidth != 32) {
3118 Error(Loc,
"expected a single 32-bit register");
3119 return MCRegister();
3123 RegisterKind NextRegKind;
3125 unsigned NextRegNum, NextRegWidth;
3128 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3129 NextRegNum, NextRegWidth,
3131 return MCRegister();
3133 if (NextRegWidth != 32) {
3134 Error(Loc,
"expected a single 32-bit register");
3135 return MCRegister();
3137 if (NextRegKind != RegKind) {
3138 Error(Loc,
"registers in a list must be of the same kind");
3139 return MCRegister();
3141 if (!AddNextRegisterToList(
Reg, RegWidth, RegKind, NextReg, Loc))
3142 return MCRegister();
3146 "expected a comma or a closing square bracket")) {
3147 return MCRegister();
3151 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3156bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3157 MCRegister &
Reg,
unsigned &RegNum,
3159 SmallVectorImpl<AsmToken> &Tokens) {
3160 auto Loc = getLoc();
3164 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3166 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3168 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3173 assert(Parser.hasPendingError());
3177 if (!subtargetHasRegister(*
TRI,
Reg)) {
3178 if (
Reg == AMDGPU::SGPR_NULL) {
3179 Error(Loc,
"'null' operand is not supported on this GPU");
3182 " register not available on this GPU");
3190bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3191 MCRegister &
Reg,
unsigned &RegNum,
3193 bool RestoreOnFailure ) {
3197 if (ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth, Tokens)) {
3198 if (RestoreOnFailure) {
3199 while (!Tokens.
empty()) {
3208std::optional<StringRef>
3209AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3212 return StringRef(
".amdgcn.next_free_vgpr");
3214 return StringRef(
".amdgcn.next_free_sgpr");
3216 return std::nullopt;
3220void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3221 auto SymbolName = getGprCountSymbolName(RegKind);
3222 assert(SymbolName &&
"initializing invalid register kind");
3228bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3229 unsigned DwordRegIndex,
3230 unsigned RegWidth) {
3235 auto SymbolName = getGprCountSymbolName(RegKind);
3240 int64_t NewMax = DwordRegIndex +
divideCeil(RegWidth, 32) - 1;
3244 return !
Error(getLoc(),
3245 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3249 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3251 if (OldCount <= NewMax)
3257std::unique_ptr<AMDGPUOperand>
3258AMDGPUAsmParser::parseRegister(
bool RestoreOnFailure) {
3260 SMLoc StartLoc = Tok.getLoc();
3261 SMLoc EndLoc = Tok.getEndLoc();
3262 RegisterKind RegKind;
3264 unsigned RegNum, RegWidth;
3266 if (!ParseAMDGPURegister(RegKind,
Reg, RegNum, RegWidth)) {
3270 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3273 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3274 return AMDGPUOperand::CreateReg(
this,
Reg, StartLoc, EndLoc);
3277ParseStatus AMDGPUAsmParser::parseImm(
OperandVector &Operands,
3281 if (isRegister() || isModifier())
3284 if (
Lit == LitModifier::None) {
3285 if (trySkipId(
"lit"))
3286 Lit = LitModifier::Lit;
3287 else if (trySkipId(
"lit64"))
3288 Lit = LitModifier::Lit64;
3290 if (
Lit != LitModifier::None) {
3293 ParseStatus S = parseImm(Operands, HasSP3AbsModifier,
Lit);
3302 const auto& NextTok = peekToken();
3305 bool Negate =
false;
3313 AMDGPUOperand::Modifiers Mods;
3321 StringRef Num = getTokenStr();
3324 APFloat RealVal(APFloat::IEEEdouble());
3325 auto roundMode = APFloat::rmNearestTiesToEven;
3326 if (
errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3329 RealVal.changeSign();
3332 AMDGPUOperand::CreateImm(
this, RealVal.bitcastToAPInt().getZExtValue(), S,
3333 AMDGPUOperand::ImmTyNone,
true));
3334 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3335 Op.setModifiers(Mods);
3344 if (HasSP3AbsModifier) {
3353 if (getParser().parsePrimaryExpr(Expr, EndLoc,
nullptr))
3356 if (Parser.parseExpression(Expr))
3360 if (Expr->evaluateAsAbsolute(IntVal)) {
3361 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
3362 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3363 Op.setModifiers(Mods);
3365 if (
Lit != LitModifier::None)
3367 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
3376ParseStatus AMDGPUAsmParser::parseReg(
OperandVector &Operands) {
3380 if (
auto R = parseRegister()) {
3388ParseStatus AMDGPUAsmParser::parseRegOrImm(
OperandVector &Operands,
3390 ParseStatus Res = parseReg(Operands);
3395 return parseImm(Operands, HasSP3AbsMod,
Lit);
3399AMDGPUAsmParser::isNamedOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3402 return str ==
"abs" || str ==
"neg" || str ==
"sext";
3408AMDGPUAsmParser::isOpcodeModifierWithVal(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3413AMDGPUAsmParser::isOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3414 return isNamedOperandModifier(Token, NextToken) || Token.
is(
AsmToken::Pipe);
3418AMDGPUAsmParser::isRegOrOperandModifier(
const AsmToken &Token,
const AsmToken &NextToken)
const {
3419 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3436AMDGPUAsmParser::isModifier() {
3439 AsmToken NextToken[2];
3440 peekTokens(NextToken);
3442 return isOperandModifier(Tok, NextToken[0]) ||
3443 (Tok.
is(
AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3444 isOpcodeModifierWithVal(Tok, NextToken[0]);
3470AMDGPUAsmParser::parseSP3NegModifier() {
3472 AsmToken NextToken[2];
3473 peekTokens(NextToken);
3476 (isRegister(NextToken[0], NextToken[1]) ||
3478 isId(NextToken[0],
"abs"))) {
3487AMDGPUAsmParser::parseRegOrImmWithFPInputMods(
OperandVector &Operands,
3495 return Error(getLoc(),
"invalid syntax, expected 'neg' modifier");
3497 SP3Neg = parseSP3NegModifier();
3500 Neg = trySkipId(
"neg");
3502 return Error(Loc,
"expected register or immediate");
3506 Abs = trySkipId(
"abs");
3511 if (trySkipId(
"lit")) {
3512 Lit = LitModifier::Lit;
3515 }
else if (trySkipId(
"lit64")) {
3516 Lit = LitModifier::Lit64;
3519 if (!has64BitLiterals())
3520 return Error(Loc,
"lit64 is not supported on this GPU");
3526 return Error(Loc,
"expected register or immediate");
3530 Res = parseRegOrImm(Operands, SP3Abs,
Lit);
3532 Res = parseReg(Operands);
3535 return (SP3Neg || Neg || SP3Abs || Abs ||
Lit != LitModifier::None)
3539 if (
Lit != LitModifier::None && !Operands.
back()->isImm())
3540 Error(Loc,
"expected immediate with lit modifier");
3542 if (SP3Abs && !skipToken(
AsmToken::Pipe,
"expected vertical bar"))
3548 if (
Lit != LitModifier::None &&
3552 AMDGPUOperand::Modifiers Mods;
3553 Mods.Abs = Abs || SP3Abs;
3554 Mods.Neg = Neg || SP3Neg;
3557 if (Mods.hasFPModifiers() ||
Lit != LitModifier::None) {
3558 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3560 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3561 Op.setModifiers(Mods);
3567AMDGPUAsmParser::parseRegOrImmWithIntInputMods(
OperandVector &Operands,
3569 bool Sext = trySkipId(
"sext");
3570 if (Sext && !skipToken(
AsmToken::LParen,
"expected left paren after sext"))
3575 Res = parseRegOrImm(Operands);
3577 Res = parseReg(Operands);
3585 AMDGPUOperand::Modifiers Mods;
3588 if (Mods.hasIntModifiers()) {
3589 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands.
back());
3591 return Error(
Op.getStartLoc(),
"expected an absolute expression");
3592 Op.setModifiers(Mods);
3598ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(
OperandVector &Operands) {
3599 return parseRegOrImmWithFPInputMods(Operands,
false);
3602ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(
OperandVector &Operands) {
3603 return parseRegOrImmWithIntInputMods(Operands,
false);
3606ParseStatus AMDGPUAsmParser::parseVReg32OrOff(
OperandVector &Operands) {
3607 auto Loc = getLoc();
3608 if (trySkipId(
"off")) {
3609 Operands.
push_back(AMDGPUOperand::CreateImm(
this, 0, Loc,
3610 AMDGPUOperand::ImmTyOff,
false));
3617 std::unique_ptr<AMDGPUOperand>
Reg = parseRegister();
3626unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3633 return Match_InvalidOperand;
3635 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3636 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3639 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::dst_sel);
3641 if (!
Op.isImm() ||
Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3642 return Match_InvalidOperand;
3650 if (tryAnotherVOPDEncoding(Inst))
3651 return Match_InvalidOperand;
3653 return Match_Success;
3657 static const unsigned Variants[] = {
3667ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants()
const {
3668 if (isForcedDPP() && isForcedVOP3()) {
3672 if (getForcedEncodingSize() == 32) {
3677 if (isForcedVOP3()) {
3682 if (isForcedSDWA()) {
3688 if (isForcedDPP()) {
3696StringRef AMDGPUAsmParser::getMatchedVariantName()
const {
3697 if (isForcedDPP() && isForcedVOP3())
3700 if (getForcedEncodingSize() == 32)
3716AMDGPUAsmParser::findImplicitSGPRReadInVOP(
const MCInst &Inst)
const {
3720 case AMDGPU::FLAT_SCR:
3722 case AMDGPU::VCC_LO:
3723 case AMDGPU::VCC_HI:
3730 return MCRegister();
3737bool AMDGPUAsmParser::isInlineConstant(
const MCInst &Inst,
3738 unsigned OpIdx)
const {
3795unsigned AMDGPUAsmParser::getConstantBusLimit(
unsigned Opcode)
const {
3801 case AMDGPU::V_LSHLREV_B64_e64:
3802 case AMDGPU::V_LSHLREV_B64_gfx10:
3803 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3804 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3805 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3806 case AMDGPU::V_LSHRREV_B64_e64:
3807 case AMDGPU::V_LSHRREV_B64_gfx10:
3808 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3809 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3810 case AMDGPU::V_ASHRREV_I64_e64:
3811 case AMDGPU::V_ASHRREV_I64_gfx10:
3812 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3813 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3814 case AMDGPU::V_LSHL_B64_e64:
3815 case AMDGPU::V_LSHR_B64_e64:
3816 case AMDGPU::V_ASHR_I64_e64:
3829 bool AddMandatoryLiterals =
false) {
3832 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3836 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3838 return {getNamedOperandIdx(Opcode, OpName::src0X),
3839 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3840 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3841 getNamedOperandIdx(Opcode, OpName::src0Y),
3842 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3843 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3848 return {getNamedOperandIdx(Opcode, OpName::src0),
3849 getNamedOperandIdx(Opcode, OpName::src1),
3850 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3853bool AMDGPUAsmParser::usesConstantBus(
const MCInst &Inst,
unsigned OpIdx) {
3856 return !isInlineConstant(Inst,
OpIdx);
3863 return isSGPR(PReg,
TRI) && PReg != SGPR_NULL;
3874 const unsigned Opcode = Inst.
getOpcode();
3875 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3878 if (!LaneSelOp.
isReg())
3881 return LaneSelReg ==
M0 || LaneSelReg == M0_gfxpre11;
3884bool AMDGPUAsmParser::validateConstantBusLimitations(
3886 const unsigned Opcode = Inst.
getOpcode();
3887 const MCInstrDesc &
Desc = MII.
get(Opcode);
3888 MCRegister LastSGPR;
3889 unsigned ConstantBusUseCount = 0;
3890 unsigned NumLiterals = 0;
3891 unsigned LiteralSize;
3893 if (!(
Desc.TSFlags &
3908 SmallDenseSet<MCRegister> SGPRsUsed;
3909 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3911 SGPRsUsed.
insert(SGPRUsed);
3912 ++ConstantBusUseCount;
3917 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3919 for (
int OpIdx : OpIndices) {
3924 if (usesConstantBus(Inst,
OpIdx)) {
3933 if (SGPRsUsed.
insert(LastSGPR).second) {
3934 ++ConstantBusUseCount;
3954 if (NumLiterals == 0) {
3957 }
else if (LiteralSize !=
Size) {
3963 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3965 "invalid operand (violates constant bus restrictions)");
3972std::optional<unsigned>
3973AMDGPUAsmParser::checkVOPDRegBankConstraints(
const MCInst &Inst,
bool AsVOPD3) {
3975 const unsigned Opcode = Inst.
getOpcode();
3981 auto getVRegIdx = [&](unsigned,
unsigned OperandIdx) {
3982 const MCOperand &Opr = Inst.
getOperand(OperandIdx);
3991 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3992 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3993 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3994 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3995 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3996 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4000 for (
auto OpName : {OpName::src0X, OpName::src0Y}) {
4001 int I = getNamedOperandIdx(Opcode, OpName);
4005 int64_t
Imm =
Op.getImm();
4011 for (
auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4012 OpName::vsrc2Y, OpName::imm}) {
4013 int I = getNamedOperandIdx(Opcode, OpName);
4023 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4024 getVRegIdx, *
TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4026 return InvalidCompOprIdx;
4029bool AMDGPUAsmParser::validateVOPD(
const MCInst &Inst,
4036 for (
const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4037 AMDGPUOperand &
Op = (AMDGPUOperand &)*Operand;
4038 if ((
Op.isRegKind() ||
Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4040 Error(
Op.getStartLoc(),
"ABS not allowed in VOPD3 instructions");
4044 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4045 if (!InvalidCompOprIdx.has_value())
4048 auto CompOprIdx = *InvalidCompOprIdx;
4051 std::max(InstInfo[
VOPD::X].getIndexInParsedOperands(CompOprIdx),
4052 InstInfo[
VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4053 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4055 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4056 if (CompOprIdx == VOPD::Component::DST) {
4058 Error(Loc,
"dst registers must be distinct");
4060 Error(Loc,
"one dst register must be even and the other odd");
4062 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4063 Error(Loc, Twine(
"src") + Twine(CompSrcIdx) +
4064 " operands must use different VGPR banks");
4072bool AMDGPUAsmParser::tryVOPD3(
const MCInst &Inst) {
4074 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
false);
4075 if (!InvalidCompOprIdx.has_value())
4079 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst,
true);
4080 if (InvalidCompOprIdx.has_value()) {
4085 if (*InvalidCompOprIdx == VOPD::Component::DST)
4098bool AMDGPUAsmParser::tryVOPD(
const MCInst &Inst) {
4099 const unsigned Opcode = Inst.
getOpcode();
4109 if (
II[
VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4110 II[
VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4114 for (
auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4115 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4116 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4117 int I = getNamedOperandIdx(Opcode, OpName);
4124 return !tryVOPD3(Inst);
4129bool AMDGPUAsmParser::tryAnotherVOPDEncoding(
const MCInst &Inst) {
4130 const unsigned Opcode = Inst.
getOpcode();
4135 return tryVOPD(Inst);
4136 return tryVOPD3(Inst);
4139bool AMDGPUAsmParser::validateIntClampSupported(
const MCInst &Inst) {
4145 int ClampIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::clamp);
4156bool AMDGPUAsmParser::validateMIMGDataSize(
const MCInst &Inst,
SMLoc IDLoc) {
4164 int VDataIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
4165 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4166 int TFEIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::tfe);
4174 unsigned VDataSize = getRegOperandSize(
Desc, VDataIdx);
4175 unsigned TFESize = (TFEIdx != -1 && Inst.
getOperand(TFEIdx).
getImm()) ? 1 : 0;
4180 bool IsPackedD16 =
false;
4184 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4185 IsPackedD16 = D16Idx >= 0;
4190 if ((VDataSize / 4) ==
DataSize + TFESize)
4195 Modifiers = IsPackedD16 ?
"dmask and d16" :
"dmask";
4197 Modifiers = IsPackedD16 ?
"dmask, d16 and tfe" :
"dmask and tfe";
4199 Error(IDLoc,
Twine(
"image data size does not match ") + Modifiers);
4203bool AMDGPUAsmParser::validateMIMGAddrSize(
const MCInst &Inst, SMLoc IDLoc) {
4212 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4214 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
4216 ? AMDGPU::OpName::srsrc
4217 : AMDGPU::OpName::rsrc;
4218 int SrsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RSrcOpName);
4219 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4220 int A16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::a16);
4224 assert(SrsrcIdx > VAddr0Idx);
4227 if (BaseOpcode->
BVH) {
4228 if (IsA16 == BaseOpcode->
A16)
4230 Error(IDLoc,
"image address size does not match a16");
4236 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4237 unsigned ActualAddrSize =
4238 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(
Desc, VAddr0Idx) / 4;
4240 unsigned ExpectedAddrSize =
4244 if (hasPartialNSAEncoding() &&
4247 int VAddrLastIdx = SrsrcIdx - 1;
4248 unsigned VAddrLastSize = getRegOperandSize(
Desc, VAddrLastIdx) / 4;
4250 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4253 if (ExpectedAddrSize > 12)
4254 ExpectedAddrSize = 16;
4259 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4263 if (ActualAddrSize == ExpectedAddrSize)
4266 Error(IDLoc,
"image address size does not match dim and a16");
4270bool AMDGPUAsmParser::validateMIMGAtomicDMask(
const MCInst &Inst) {
4277 if (!
Desc.mayLoad() || !
Desc.mayStore())
4280 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4287 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4290bool AMDGPUAsmParser::validateMIMGGatherDMask(
const MCInst &Inst) {
4298 int DMaskIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dmask);
4306 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4309bool AMDGPUAsmParser::validateMIMGDim(
const MCInst &Inst,
4324 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4325 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4332bool AMDGPUAsmParser::validateMIMGMSAA(
const MCInst &Inst) {
4340 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4343 if (!BaseOpcode->
MSAA)
4346 int DimIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dim);
4352 return DimInfo->
MSAA;
4358 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4359 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4360 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4370bool AMDGPUAsmParser::validateMovrels(
const MCInst &Inst,
4379 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4382 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4390 Error(getOperandLoc(Operands, Src0Idx),
"source operand must be a VGPR");
4394bool AMDGPUAsmParser::validateMAIAccWrite(
const MCInst &Inst,
4399 if (
Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4402 const int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4405 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4412 Error(getOperandLoc(Operands, Src0Idx),
4413 "source operand must be either a VGPR or an inline constant");
4420bool AMDGPUAsmParser::validateMAISrc2(
const MCInst &Inst,
4423 const MCInstrDesc &
Desc = MII.
get(Opcode);
4426 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4429 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4433 if (Inst.
getOperand(Src2Idx).
isImm() && isInlineConstant(Inst, Src2Idx)) {
4434 Error(getOperandLoc(Operands, Src2Idx),
4435 "inline constants are not allowed for this operand");
4442bool AMDGPUAsmParser::validateMFMA(
const MCInst &Inst,
4450 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
4451 if (BlgpIdx != -1) {
4452 if (
const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(
Opc)) {
4453 int CbszIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
4463 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4464 Error(getOperandLoc(Operands, Src0Idx),
4465 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4470 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4471 Error(getOperandLoc(Operands, Src1Idx),
4472 "wrong register tuple size for blgp value " + Twine(BLGP));
4480 const int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4484 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4488 MCRegister Src2Reg = Src2.
getReg();
4490 if (Src2Reg == DstReg)
4495 .getSizeInBits() <= 128)
4498 if (
TRI->regsOverlap(Src2Reg, DstReg)) {
4499 Error(getOperandLoc(Operands, Src2Idx),
4500 "source 2 operand must not partially overlap with dst");
4507bool AMDGPUAsmParser::validateDivScale(
const MCInst &Inst) {
4511 case V_DIV_SCALE_F32_gfx6_gfx7:
4512 case V_DIV_SCALE_F32_vi:
4513 case V_DIV_SCALE_F32_gfx10:
4514 case V_DIV_SCALE_F64_gfx6_gfx7:
4515 case V_DIV_SCALE_F64_vi:
4516 case V_DIV_SCALE_F64_gfx10:
4522 for (
auto Name : {AMDGPU::OpName::src0_modifiers,
4523 AMDGPU::OpName::src2_modifiers,
4524 AMDGPU::OpName::src2_modifiers}) {
4535bool AMDGPUAsmParser::validateMIMGD16(
const MCInst &Inst) {
4543 int D16Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::d16);
4552bool AMDGPUAsmParser::validateTensorR128(
const MCInst &Inst) {
4559 int R128Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::r128);
4567 case AMDGPU::V_SUBREV_F32_e32:
4568 case AMDGPU::V_SUBREV_F32_e64:
4569 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4570 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4571 case AMDGPU::V_SUBREV_F32_e32_vi:
4572 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4573 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4574 case AMDGPU::V_SUBREV_F32_e64_vi:
4576 case AMDGPU::V_SUBREV_CO_U32_e32:
4577 case AMDGPU::V_SUBREV_CO_U32_e64:
4578 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4579 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4581 case AMDGPU::V_SUBBREV_U32_e32:
4582 case AMDGPU::V_SUBBREV_U32_e64:
4583 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4584 case AMDGPU::V_SUBBREV_U32_e32_vi:
4585 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4586 case AMDGPU::V_SUBBREV_U32_e64_vi:
4588 case AMDGPU::V_SUBREV_U32_e32:
4589 case AMDGPU::V_SUBREV_U32_e64:
4590 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4591 case AMDGPU::V_SUBREV_U32_e32_vi:
4592 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4593 case AMDGPU::V_SUBREV_U32_e64_vi:
4595 case AMDGPU::V_SUBREV_F16_e32:
4596 case AMDGPU::V_SUBREV_F16_e64:
4597 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4598 case AMDGPU::V_SUBREV_F16_e32_vi:
4599 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4600 case AMDGPU::V_SUBREV_F16_e64_vi:
4602 case AMDGPU::V_SUBREV_U16_e32:
4603 case AMDGPU::V_SUBREV_U16_e64:
4604 case AMDGPU::V_SUBREV_U16_e32_vi:
4605 case AMDGPU::V_SUBREV_U16_e64_vi:
4607 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4608 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4609 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4611 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4612 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4614 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4615 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4617 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4618 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4620 case AMDGPU::V_LSHRREV_B32_e32:
4621 case AMDGPU::V_LSHRREV_B32_e64:
4622 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4623 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4624 case AMDGPU::V_LSHRREV_B32_e32_vi:
4625 case AMDGPU::V_LSHRREV_B32_e64_vi:
4626 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4627 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4629 case AMDGPU::V_ASHRREV_I32_e32:
4630 case AMDGPU::V_ASHRREV_I32_e64:
4631 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4632 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4633 case AMDGPU::V_ASHRREV_I32_e32_vi:
4634 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4635 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4636 case AMDGPU::V_ASHRREV_I32_e64_vi:
4638 case AMDGPU::V_LSHLREV_B32_e32:
4639 case AMDGPU::V_LSHLREV_B32_e64:
4640 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4641 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4642 case AMDGPU::V_LSHLREV_B32_e32_vi:
4643 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4644 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4645 case AMDGPU::V_LSHLREV_B32_e64_vi:
4647 case AMDGPU::V_LSHLREV_B16_e32:
4648 case AMDGPU::V_LSHLREV_B16_e64:
4649 case AMDGPU::V_LSHLREV_B16_e32_vi:
4650 case AMDGPU::V_LSHLREV_B16_e64_vi:
4651 case AMDGPU::V_LSHLREV_B16_gfx10:
4653 case AMDGPU::V_LSHRREV_B16_e32:
4654 case AMDGPU::V_LSHRREV_B16_e64:
4655 case AMDGPU::V_LSHRREV_B16_e32_vi:
4656 case AMDGPU::V_LSHRREV_B16_e64_vi:
4657 case AMDGPU::V_LSHRREV_B16_gfx10:
4659 case AMDGPU::V_ASHRREV_I16_e32:
4660 case AMDGPU::V_ASHRREV_I16_e64:
4661 case AMDGPU::V_ASHRREV_I16_e32_vi:
4662 case AMDGPU::V_ASHRREV_I16_e64_vi:
4663 case AMDGPU::V_ASHRREV_I16_gfx10:
4665 case AMDGPU::V_LSHLREV_B64_e64:
4666 case AMDGPU::V_LSHLREV_B64_gfx10:
4667 case AMDGPU::V_LSHLREV_B64_vi:
4669 case AMDGPU::V_LSHRREV_B64_e64:
4670 case AMDGPU::V_LSHRREV_B64_gfx10:
4671 case AMDGPU::V_LSHRREV_B64_vi:
4673 case AMDGPU::V_ASHRREV_I64_e64:
4674 case AMDGPU::V_ASHRREV_I64_gfx10:
4675 case AMDGPU::V_ASHRREV_I64_vi:
4677 case AMDGPU::V_PK_LSHLREV_B16:
4678 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4679 case AMDGPU::V_PK_LSHLREV_B16_vi:
4681 case AMDGPU::V_PK_LSHRREV_B16:
4682 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4683 case AMDGPU::V_PK_LSHRREV_B16_vi:
4684 case AMDGPU::V_PK_ASHRREV_I16:
4685 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4686 case AMDGPU::V_PK_ASHRREV_I16_vi:
4693bool AMDGPUAsmParser::validateLdsDirect(
const MCInst &Inst,
4695 using namespace SIInstrFlags;
4696 const unsigned Opcode = Inst.
getOpcode();
4697 const MCInstrDesc &
Desc = MII.
get(Opcode);
4702 if ((
Desc.TSFlags & Enc) == 0)
4705 for (
auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4706 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4710 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4713 Error(getOperandLoc(Operands, SrcIdx),
4714 "lds_direct is not supported on this GPU");
4719 Error(getOperandLoc(Operands, SrcIdx),
4720 "lds_direct cannot be used with this instruction");
4724 if (SrcName != OpName::src0) {
4725 Error(getOperandLoc(Operands, SrcIdx),
4726 "lds_direct may be used as src0 only");
4735SMLoc AMDGPUAsmParser::getFlatOffsetLoc(
const OperandVector &Operands)
const {
4736 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
4737 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4738 if (
Op.isFlatOffset())
4739 return Op.getStartLoc();
4744bool AMDGPUAsmParser::validateOffset(
const MCInst &Inst,
4747 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4753 return validateFlatOffset(Inst, Operands);
4756 return validateSMEMOffset(Inst, Operands);
4762 const unsigned OffsetSize = 24;
4763 if (!
isUIntN(OffsetSize - 1,
Op.getImm())) {
4764 Error(getFlatOffsetLoc(Operands),
4765 Twine(
"expected a ") + Twine(OffsetSize - 1) +
4766 "-bit unsigned offset for buffer ops");
4770 const unsigned OffsetSize = 16;
4771 if (!
isUIntN(OffsetSize,
Op.getImm())) {
4772 Error(getFlatOffsetLoc(Operands),
4773 Twine(
"expected a ") + Twine(OffsetSize) +
"-bit unsigned offset");
4780bool AMDGPUAsmParser::validateFlatOffset(
const MCInst &Inst,
4787 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4791 if (!hasFlatOffsets() &&
Op.getImm() != 0) {
4792 Error(getFlatOffsetLoc(Operands),
4793 "flat offset modifier is not supported on this GPU");
4800 bool AllowNegative =
4803 if (!
isIntN(OffsetSize,
Op.getImm()) || (!AllowNegative &&
Op.getImm() < 0)) {
4804 Error(getFlatOffsetLoc(Operands),
4805 Twine(
"expected a ") +
4806 (AllowNegative ? Twine(OffsetSize) +
"-bit signed offset"
4807 : Twine(OffsetSize - 1) +
"-bit unsigned offset"));
4814SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(
const OperandVector &Operands)
const {
4816 for (
unsigned i = 2, e = Operands.
size(); i != e; ++i) {
4817 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
4818 if (
Op.isSMEMOffset() ||
Op.isSMEMOffsetMod())
4819 return Op.getStartLoc();
4824bool AMDGPUAsmParser::validateSMEMOffset(
const MCInst &Inst,
4834 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4848 Error(getSMEMOffsetLoc(Operands),
4850 ?
"expected a 23-bit unsigned offset for buffer ops"
4851 :
isGFX12Plus() ?
"expected a 24-bit signed offset"
4852 : (
isVI() || IsBuffer) ?
"expected a 20-bit unsigned offset"
4853 :
"expected a 21-bit signed offset");
4858bool AMDGPUAsmParser::validateSOPLiteral(
const MCInst &Inst,
4861 const MCInstrDesc &
Desc = MII.
get(Opcode);
4865 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4866 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4868 const int OpIndices[] = { Src0Idx, Src1Idx };
4870 unsigned NumExprs = 0;
4871 unsigned NumLiterals = 0;
4874 for (
int OpIdx : OpIndices) {
4875 if (
OpIdx == -1)
break;
4881 std::optional<int64_t>
Imm;
4884 }
else if (MO.
isExpr()) {
4893 if (!
Imm.has_value()) {
4895 }
else if (!isInlineConstant(Inst,
OpIdx)) {
4899 if (NumLiterals == 0 || LiteralValue !=
Value) {
4907 if (NumLiterals + NumExprs <= 1)
4910 Error(getOperandLoc(Operands, Src1Idx),
4911 "only one unique literal operand is allowed");
4915bool AMDGPUAsmParser::validateOpSel(
const MCInst &Inst) {
4918 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4928 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4929 if (OpSelIdx != -1) {
4933 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4934 if (OpSelHiIdx != -1) {
4943 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4953 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
4954 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
4955 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
4956 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
4958 const MCOperand &Src0 = Inst.
getOperand(Src0Idx);
4959 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
4965 auto VerifyOneSGPR = [
OpSel, OpSelHi](
unsigned Index) ->
bool {
4967 return ((OpSel & Mask) == 0) && ((OpSelHi &
Mask) == 0);
4977 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
4978 if (Src2Idx != -1) {
4979 const MCOperand &Src2 = Inst.
getOperand(Src2Idx);
4989bool AMDGPUAsmParser::validateTrue16OpSel(
const MCInst &Inst) {
4990 if (!hasTrue16Insts())
4992 const MCRegisterInfo *MRI = getMRI();
4994 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
5000 if (OpSelOpValue == 0)
5002 unsigned OpCount = 0;
5003 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5004 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5005 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), OpName);
5012 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5013 if (OpSelOpIsHi != VGPRSuffixIsHi)
5022bool AMDGPUAsmParser::validateNeg(
const MCInst &Inst, AMDGPU::OpName OpName) {
5023 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5036 int NegIdx = AMDGPU::getNamedOperandIdx(
Opc, OpName);
5047 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5048 AMDGPU::OpName::src1_modifiers,
5049 AMDGPU::OpName::src2_modifiers};
5051 for (
unsigned i = 0; i < 3; ++i) {
5061bool AMDGPUAsmParser::validateDPP(
const MCInst &Inst,
5064 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp_ctrl);
5065 if (DppCtrlIdx >= 0) {
5072 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5073 Error(S,
isGFX12() ?
"DP ALU dpp only supports row_share"
5074 :
"DP ALU dpp only supports row_newbcast");
5079 int Dpp8Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::dpp8);
5080 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5083 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
5085 const MCOperand &Src1 = Inst.
getOperand(Src1Idx);
5088 Error(getOperandLoc(Operands, Src1Idx),
5089 "invalid operand for instruction");
5093 Error(getInstLoc(Operands),
5094 "src1 immediate operand invalid for instruction");
5104bool AMDGPUAsmParser::validateVccOperand(MCRegister
Reg)
const {
5105 return (
Reg == AMDGPU::VCC && isWave64()) ||
5106 (
Reg == AMDGPU::VCC_LO && isWave32());
5110bool AMDGPUAsmParser::validateVOPLiteral(
const MCInst &Inst,
5113 const MCInstrDesc &
Desc = MII.
get(Opcode);
5114 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5116 !HasMandatoryLiteral && !
isVOPD(Opcode))
5121 std::optional<unsigned> LiteralOpIdx;
5124 for (
int OpIdx : OpIndices) {
5134 std::optional<int64_t>
Imm;
5140 bool IsAnotherLiteral =
false;
5141 bool IsForcedLit = findMCOperand(Operands,
OpIdx).isForcedLit();
5142 bool IsForcedLit64 = findMCOperand(Operands,
OpIdx).isForcedLit64();
5143 if (!
Imm.has_value()) {
5145 IsAnotherLiteral =
true;
5146 }
else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst,
OpIdx)) {
5151 HasMandatoryLiteral);
5159 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5160 (!has64BitLiterals() ||
Desc.getSize() != 4)) {
5162 "invalid operand for instruction");
5167 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5168 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5170 "invalid operand for instruction");
5174 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5181 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5182 !getFeatureBits()[FeatureVOP3Literal]) {
5184 "literal operands are not supported");
5188 if (LiteralOpIdx && IsAnotherLiteral) {
5189 Error(getLaterLoc(getOperandLoc(Operands,
OpIdx),
5190 getOperandLoc(Operands, *LiteralOpIdx)),
5191 "only one unique literal operand is allowed");
5195 if (IsAnotherLiteral)
5196 LiteralOpIdx =
OpIdx;
5219bool AMDGPUAsmParser::validateAGPRLdSt(
const MCInst &Inst)
const {
5227 ? AMDGPU::OpName::data0
5228 : AMDGPU::OpName::vdata;
5230 const MCRegisterInfo *MRI = getMRI();
5231 int DstAreg =
IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5235 int Data2Areg =
IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5236 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5240 auto FB = getFeatureBits();
5241 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5242 if (DataAreg < 0 || DstAreg < 0)
5244 return DstAreg == DataAreg;
5247 return DstAreg < 1 && DataAreg < 1;
5250bool AMDGPUAsmParser::validateVGPRAlign(
const MCInst &Inst)
const {
5251 auto FB = getFeatureBits();
5252 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5256 const MCRegisterInfo *MRI = getMRI();
5259 if (FB[AMDGPU::FeatureGFX90AInsts] &&
Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5262 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5266 case AMDGPU::DS_LOAD_TR6_B96:
5267 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5271 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5272 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5276 int VAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
5277 if (VAddrIdx != -1) {
5280 if ((
Sub - AMDGPU::VGPR0) & 1)
5285 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5286 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5291 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5292 const MCRegisterClass &AGPR32 = MRI->
getRegClass(AMDGPU::AGPR_32RegClassID);
5311SMLoc AMDGPUAsmParser::getBLGPLoc(
const OperandVector &Operands)
const {
5312 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
5313 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
5315 return Op.getStartLoc();
5320bool AMDGPUAsmParser::validateBLGP(
const MCInst &Inst,
5323 int BlgpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
5326 SMLoc BLGPLoc = getBLGPLoc(Operands);
5329 bool IsNeg = StringRef(BLGPLoc.
getPointer()).starts_with(
"neg:");
5330 auto FB = getFeatureBits();
5331 bool UsesNeg =
false;
5332 if (FB[AMDGPU::FeatureGFX940Insts]) {
5334 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5335 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5336 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5337 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5342 if (IsNeg == UsesNeg)
5346 UsesNeg ?
"invalid modifier: blgp is not supported"
5347 :
"invalid modifier: neg is not supported");
5352bool AMDGPUAsmParser::validateWaitCnt(
const MCInst &Inst,
5358 if (
Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5359 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5360 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5361 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5364 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
5367 if (
Reg == AMDGPU::SGPR_NULL)
5370 Error(getOperandLoc(Operands, Src0Idx),
"src0 must be null");
5374bool AMDGPUAsmParser::validateDS(
const MCInst &Inst,
5380 return validateGWS(Inst, Operands);
5385 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::gds);
5390 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5391 Error(S,
"gds modifier is not supported on this GPU");
5399bool AMDGPUAsmParser::validateGWS(
const MCInst &Inst,
5401 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5405 if (
Opc != AMDGPU::DS_GWS_INIT_vi &&
Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5406 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5409 const MCRegisterInfo *MRI = getMRI();
5410 const MCRegisterClass &VGPR32 = MRI->
getRegClass(AMDGPU::VGPR_32RegClassID);
5412 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::data0);
5415 auto RegIdx =
Reg - (VGPR32.
contains(
Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5417 Error(getOperandLoc(Operands, Data0Pos),
"vgpr must be even aligned");
5424bool AMDGPUAsmParser::validateCoherencyBits(
const MCInst &Inst,
5427 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.
getOpcode(),
5428 AMDGPU::OpName::cpol);
5436 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5439 Error(S,
"scale_offset is not supported on this GPU");
5442 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5445 Error(S,
"nv is not supported on this GPU");
5450 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5453 Error(S,
"scale_offset is not supported for this instruction");
5457 return validateTHAndScopeBits(Inst, Operands, CPol);
5462 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5463 Error(S,
"cache policy is not supported for SMRD instructions");
5467 Error(IDLoc,
"invalid cache policy for SMEM instruction");
5476 if (!(TSFlags & AllowSCCModifier)) {
5477 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5481 "scc modifier is not supported for this instruction on this GPU");
5492 :
"instruction must use glc");
5497 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5500 &CStr.data()[CStr.find(
isGFX940() ?
"sc0" :
"glc")]);
5502 :
"instruction must not use glc");
5510bool AMDGPUAsmParser::validateTHAndScopeBits(
const MCInst &Inst,
5512 const unsigned CPol) {
5516 const unsigned Opcode = Inst.
getOpcode();
5517 const MCInstrDesc &TID = MII.
get(Opcode);
5520 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5527 return PrintError(
"th:TH_ATOMIC_RETURN requires a destination operand");
5532 return PrintError(
"instruction must use th:TH_ATOMIC_RETURN");
5540 return PrintError(
"invalid th value for SMEM instruction");
5547 return PrintError(
"scope and th combination is not valid");
5553 return PrintError(
"invalid th value for atomic instructions");
5556 return PrintError(
"invalid th value for store instructions");
5559 return PrintError(
"invalid th value for load instructions");
5565bool AMDGPUAsmParser::validateTFE(
const MCInst &Inst,
5568 if (
Desc.mayStore() &&
5570 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5571 if (Loc != getInstLoc(Operands)) {
5572 Error(Loc,
"TFE modifier has no meaning for store instructions");
5580bool AMDGPUAsmParser::validateWMMA(
const MCInst &Inst,
5586 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) ->
bool {
5587 int FmtIdx = AMDGPU::getNamedOperandIdx(
Opc, FmtOp);
5591 int SrcIdx = AMDGPU::getNamedOperandIdx(
Opc, SrcOp);
5599 Error(getOperandLoc(Operands, SrcIdx),
5600 "wrong register tuple size for " +
5605 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5606 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5609bool AMDGPUAsmParser::validateInstruction(
const MCInst &Inst, SMLoc IDLoc,
5611 if (!validateLdsDirect(Inst, Operands))
5613 if (!validateTrue16OpSel(Inst)) {
5614 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5615 "op_sel operand conflicts with 16-bit operand suffix");
5618 if (!validateSOPLiteral(Inst, Operands))
5620 if (!validateVOPLiteral(Inst, Operands)) {
5623 if (!validateConstantBusLimitations(Inst, Operands)) {
5626 if (!validateVOPD(Inst, Operands)) {
5629 if (!validateIntClampSupported(Inst)) {
5630 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5631 "integer clamping is not supported on this GPU");
5634 if (!validateOpSel(Inst)) {
5635 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5636 "invalid op_sel operand");
5639 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5640 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5641 "invalid neg_lo operand");
5644 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5645 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5646 "invalid neg_hi operand");
5649 if (!validateDPP(Inst, Operands)) {
5653 if (!validateMIMGD16(Inst)) {
5654 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5655 "d16 modifier is not supported on this GPU");
5658 if (!validateMIMGDim(Inst, Operands)) {
5659 Error(IDLoc,
"missing dim operand");
5662 if (!validateTensorR128(Inst)) {
5663 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5664 "instruction must set modifier r128=0");
5667 if (!validateMIMGMSAA(Inst)) {
5668 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5669 "invalid dim; must be MSAA type");
5672 if (!validateMIMGDataSize(Inst, IDLoc)) {
5675 if (!validateMIMGAddrSize(Inst, IDLoc))
5677 if (!validateMIMGAtomicDMask(Inst)) {
5678 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5679 "invalid atomic image dmask");
5682 if (!validateMIMGGatherDMask(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5684 "invalid image_gather dmask: only one bit must be set");
5687 if (!validateMovrels(Inst, Operands)) {
5690 if (!validateOffset(Inst, Operands)) {
5693 if (!validateMAIAccWrite(Inst, Operands)) {
5696 if (!validateMAISrc2(Inst, Operands)) {
5699 if (!validateMFMA(Inst, Operands)) {
5702 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5706 if (!validateAGPRLdSt(Inst)) {
5707 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5708 ?
"invalid register class: data and dst should be all VGPR or AGPR"
5709 :
"invalid register class: agpr loads and stores not supported on this GPU"
5713 if (!validateVGPRAlign(Inst)) {
5715 "invalid register class: vgpr tuples must be 64 bit aligned");
5718 if (!validateDS(Inst, Operands)) {
5722 if (!validateBLGP(Inst, Operands)) {
5726 if (!validateDivScale(Inst)) {
5727 Error(IDLoc,
"ABS not allowed in VOP3B instructions");
5730 if (!validateWaitCnt(Inst, Operands)) {
5733 if (!validateTFE(Inst, Operands)) {
5736 if (!validateWMMA(Inst, Operands)) {
5745 unsigned VariantID = 0);
5749 unsigned VariantID);
5751bool AMDGPUAsmParser::isSupportedMnemo(
StringRef Mnemo,
5756bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5757 const FeatureBitset &FBS,
5758 ArrayRef<unsigned> Variants) {
5759 for (
auto Variant : Variants) {
5767bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5769 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5772 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5777 getParser().clearPendingErrors();
5781 StringRef VariantName = getMatchedVariantName();
5782 if (!VariantName.
empty() && isSupportedMnemo(Mnemo, FBS)) {
5785 " variant of this instruction is not supported"));
5789 if (
isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5790 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5792 FeatureBitset FeaturesWS32 = getFeatureBits();
5793 FeaturesWS32.
flip(AMDGPU::FeatureWavefrontSize64)
5794 .
flip(AMDGPU::FeatureWavefrontSize32);
5795 FeatureBitset AvailableFeaturesWS32 =
5796 ComputeAvailableFeatures(FeaturesWS32);
5798 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5799 return Error(IDLoc,
"instruction requires wavesize=32");
5803 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5804 return Error(IDLoc,
"instruction not supported on this GPU (" +
5805 getSTI().
getCPU() +
")" +
": " + Mnemo);
5810 return Error(IDLoc,
"invalid instruction" + Suggestion);
5816 const auto &
Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5817 if (
Op.isToken() && InvalidOprIdx > 1) {
5818 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5819 return PrevOp.isToken() && PrevOp.getToken() ==
"::";
5824bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc,
unsigned &Opcode,
5827 uint64_t &ErrorInfo,
5828 bool MatchingInlineAsm) {
5831 unsigned Result = Match_Success;
5832 for (
auto Variant : getMatchedVariants()) {
5834 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5839 if (R == Match_Success || R == Match_MissingFeature ||
5840 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5841 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5842 Result != Match_MissingFeature)) {
5846 if (R == Match_Success)
5850 if (Result == Match_Success) {
5851 if (!validateInstruction(Inst, IDLoc, Operands)) {
5858 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
5859 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5865 case Match_MissingFeature:
5869 return Error(IDLoc,
"operands are not valid for this GPU or mode");
5871 case Match_InvalidOperand: {
5872 SMLoc ErrorLoc = IDLoc;
5873 if (ErrorInfo != ~0ULL) {
5874 if (ErrorInfo >= Operands.
size()) {
5875 return Error(IDLoc,
"too few operands for instruction");
5877 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5878 if (ErrorLoc == SMLoc())
5882 return Error(ErrorLoc,
"invalid VOPDY instruction");
5884 return Error(ErrorLoc,
"invalid operand for instruction");
5887 case Match_MnemonicFail:
5893bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5898 if (getParser().parseAbsoluteExpression(Tmp)) {
5901 Ret =
static_cast<uint32_t
>(Tmp);
5905bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5906 if (!getSTI().getTargetTriple().isAMDGCN())
5907 return TokError(
"directive only supported for amdgcn architecture");
5909 std::string TargetIDDirective;
5910 SMLoc TargetStart = getTok().getLoc();
5911 if (getParser().parseEscapedString(TargetIDDirective))
5914 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5915 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
5916 return getParser().Error(TargetRange.
Start,
5917 (Twine(
".amdgcn_target directive's target id ") +
5918 Twine(TargetIDDirective) +
5919 Twine(
" does not match the specified target id ") +
5920 Twine(getTargetStreamer().getTargetID()->
toString())).str());
5925bool AMDGPUAsmParser::OutOfRangeError(SMRange
Range) {
5929bool AMDGPUAsmParser::calculateGPRBlocks(
5930 const FeatureBitset &Features,
const MCExpr *VCCUsed,
5931 const MCExpr *FlatScrUsed,
bool XNACKUsed,
5932 std::optional<bool> EnableWavefrontSize32,
const MCExpr *NextFreeVGPR,
5933 SMRange VGPRRange,
const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5934 const MCExpr *&VGPRBlocks,
const MCExpr *&SGPRBlocks) {
5940 const MCExpr *
NumSGPRs = NextFreeSGPR;
5941 int64_t EvaluatedSGPRs;
5946 unsigned MaxAddressableNumSGPRs =
5949 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
Version.Major >= 8 &&
5950 !Features.
test(FeatureSGPRInitBug) &&
5951 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5952 return OutOfRangeError(SGPRRange);
5954 const MCExpr *ExtraSGPRs =
5958 if (
NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5959 (
Version.Major <= 7 || Features.
test(FeatureSGPRInitBug)) &&
5960 static_cast<uint64_t
>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5961 return OutOfRangeError(SGPRRange);
5963 if (Features.
test(FeatureSGPRInitBug))
5970 auto GetNumGPRBlocks = [&Ctx](
const MCExpr *NumGPR,
5971 unsigned Granule) ->
const MCExpr * {
5975 const MCExpr *AlignToGPR =
5977 const MCExpr *DivGPR =
5983 VGPRBlocks = GetNumGPRBlocks(
5992bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5993 if (!getSTI().getTargetTriple().isAMDGCN())
5994 return TokError(
"directive only supported for amdgcn architecture");
5997 return TokError(
"directive only supported for amdhsa OS");
5999 StringRef KernelName;
6000 if (getParser().parseIdentifier(KernelName))
6003 AMDGPU::MCKernelDescriptor KD =
6015 const MCExpr *NextFreeVGPR = ZeroExpr;
6017 const MCExpr *NamedBarCnt = ZeroExpr;
6018 uint64_t SharedVGPRCount = 0;
6019 uint64_t PreloadLength = 0;
6020 uint64_t PreloadOffset = 0;
6022 const MCExpr *NextFreeSGPR = ZeroExpr;
6025 unsigned ImpliedUserSGPRCount = 0;
6029 std::optional<unsigned> ExplicitUserSGPRCount;
6030 const MCExpr *ReserveVCC = OneExpr;
6031 const MCExpr *ReserveFlatScr = OneExpr;
6032 std::optional<bool> EnableWavefrontSize32;
6038 SMRange IDRange = getTok().getLocRange();
6039 if (!parseId(
ID,
"expected .amdhsa_ directive or .end_amdhsa_kernel"))
6042 if (
ID ==
".end_amdhsa_kernel")
6046 return TokError(
".amdhsa_ directives cannot be repeated");
6048 SMLoc ValStart = getLoc();
6049 const MCExpr *ExprVal;
6050 if (getParser().parseExpression(ExprVal))
6052 SMLoc ValEnd = getLoc();
6053 SMRange ValRange = SMRange(ValStart, ValEnd);
6056 uint64_t Val = IVal;
6057 bool EvaluatableExpr;
6058 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6060 return OutOfRangeError(ValRange);
6064#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6065 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6066 return OutOfRangeError(RANGE); \
6067 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6072#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6074 return Error(IDRange.Start, "directive should have resolvable expression", \
6077 if (
ID ==
".amdhsa_group_segment_fixed_size") {
6080 return OutOfRangeError(ValRange);
6082 }
else if (
ID ==
".amdhsa_private_segment_fixed_size") {
6085 return OutOfRangeError(ValRange);
6087 }
else if (
ID ==
".amdhsa_kernarg_size") {
6089 return OutOfRangeError(ValRange);
6091 }
else if (
ID ==
".amdhsa_user_sgpr_count") {
6093 ExplicitUserSGPRCount = Val;
6094 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_buffer") {
6098 "directive is not supported with architected flat scratch",
6101 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6104 ImpliedUserSGPRCount += 4;
6105 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_length") {
6108 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6111 return OutOfRangeError(ValRange);
6115 ImpliedUserSGPRCount += Val;
6116 PreloadLength = Val;
6118 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_preload_offset") {
6121 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6124 return OutOfRangeError(ValRange);
6128 PreloadOffset = Val;
6129 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_ptr") {
6132 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6135 ImpliedUserSGPRCount += 2;
6136 }
else if (
ID ==
".amdhsa_user_sgpr_queue_ptr") {
6139 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6142 ImpliedUserSGPRCount += 2;
6143 }
else if (
ID ==
".amdhsa_user_sgpr_kernarg_segment_ptr") {
6146 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6149 ImpliedUserSGPRCount += 2;
6150 }
else if (
ID ==
".amdhsa_user_sgpr_dispatch_id") {
6153 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6156 ImpliedUserSGPRCount += 2;
6157 }
else if (
ID ==
".amdhsa_user_sgpr_flat_scratch_init") {
6160 "directive is not supported with architected flat scratch",
6164 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6167 ImpliedUserSGPRCount += 2;
6168 }
else if (
ID ==
".amdhsa_user_sgpr_private_segment_size") {
6171 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6174 ImpliedUserSGPRCount += 1;
6175 }
else if (
ID ==
".amdhsa_wavefront_size32") {
6177 if (IVersion.
Major < 10)
6178 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6179 EnableWavefrontSize32 = Val;
6181 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6183 }
else if (
ID ==
".amdhsa_uses_dynamic_stack") {
6185 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6187 }
else if (
ID ==
".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6190 "directive is not supported with architected flat scratch",
6193 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6195 }
else if (
ID ==
".amdhsa_enable_private_segment") {
6199 "directive is not supported without architected flat scratch",
6202 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6204 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_x") {
6206 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6208 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_y") {
6210 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6212 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_id_z") {
6214 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6216 }
else if (
ID ==
".amdhsa_system_sgpr_workgroup_info") {
6218 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6220 }
else if (
ID ==
".amdhsa_system_vgpr_workitem_id") {
6222 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6224 }
else if (
ID ==
".amdhsa_next_free_vgpr") {
6225 VGPRRange = ValRange;
6226 NextFreeVGPR = ExprVal;
6227 }
else if (
ID ==
".amdhsa_next_free_sgpr") {
6228 SGPRRange = ValRange;
6229 NextFreeSGPR = ExprVal;
6230 }
else if (
ID ==
".amdhsa_accum_offset") {
6232 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6233 AccumOffset = ExprVal;
6234 }
else if (
ID ==
".amdhsa_named_barrier_count") {
6236 return Error(IDRange.
Start,
"directive requires gfx1250+", IDRange);
6237 NamedBarCnt = ExprVal;
6238 }
else if (
ID ==
".amdhsa_reserve_vcc") {
6240 return OutOfRangeError(ValRange);
6241 ReserveVCC = ExprVal;
6242 }
else if (
ID ==
".amdhsa_reserve_flat_scratch") {
6243 if (IVersion.
Major < 7)
6244 return Error(IDRange.
Start,
"directive requires gfx7+", IDRange);
6247 "directive is not supported with architected flat scratch",
6250 return OutOfRangeError(ValRange);
6251 ReserveFlatScr = ExprVal;
6252 }
else if (
ID ==
".amdhsa_reserve_xnack_mask") {
6253 if (IVersion.
Major < 8)
6254 return Error(IDRange.
Start,
"directive requires gfx8+", IDRange);
6256 return OutOfRangeError(ValRange);
6257 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6258 return getParser().Error(IDRange.
Start,
".amdhsa_reserve_xnack_mask does not match target id",
6260 }
else if (
ID ==
".amdhsa_float_round_mode_32") {
6262 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6264 }
else if (
ID ==
".amdhsa_float_round_mode_16_64") {
6266 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6268 }
else if (
ID ==
".amdhsa_float_denorm_mode_32") {
6270 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6272 }
else if (
ID ==
".amdhsa_float_denorm_mode_16_64") {
6274 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6276 }
else if (
ID ==
".amdhsa_dx10_clamp") {
6277 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6278 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6281 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6283 }
else if (
ID ==
".amdhsa_ieee_mode") {
6284 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6285 return Error(IDRange.
Start,
"directive unsupported on gfx1170+",
6288 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6290 }
else if (
ID ==
".amdhsa_fp16_overflow") {
6291 if (IVersion.
Major < 9)
6292 return Error(IDRange.
Start,
"directive requires gfx9+", IDRange);
6294 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6296 }
else if (
ID ==
".amdhsa_tg_split") {
6298 return Error(IDRange.
Start,
"directive requires gfx90a+", IDRange);
6301 }
else if (
ID ==
".amdhsa_workgroup_processor_mode") {
6304 "directive unsupported on " + getSTI().
getCPU(), IDRange);
6306 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6308 }
else if (
ID ==
".amdhsa_memory_ordered") {
6309 if (IVersion.
Major < 10)
6310 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6312 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6314 }
else if (
ID ==
".amdhsa_forward_progress") {
6315 if (IVersion.
Major < 10)
6316 return Error(IDRange.
Start,
"directive requires gfx10+", IDRange);
6318 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6320 }
else if (
ID ==
".amdhsa_shared_vgpr_count") {
6322 if (IVersion.
Major < 10 || IVersion.
Major >= 12)
6323 return Error(IDRange.
Start,
"directive requires gfx10 or gfx11",
6325 SharedVGPRCount = Val;
6327 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6329 }
else if (
ID ==
".amdhsa_inst_pref_size") {
6330 if (IVersion.
Major < 11)
6331 return Error(IDRange.
Start,
"directive requires gfx11+", IDRange);
6332 if (IVersion.
Major == 11) {
6334 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6338 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6341 }
else if (
ID ==
".amdhsa_exception_fp_ieee_invalid_op") {
6344 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6346 }
else if (
ID ==
".amdhsa_exception_fp_denorm_src") {
6348 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6350 }
else if (
ID ==
".amdhsa_exception_fp_ieee_div_zero") {
6353 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6355 }
else if (
ID ==
".amdhsa_exception_fp_ieee_overflow") {
6357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6359 }
else if (
ID ==
".amdhsa_exception_fp_ieee_underflow") {
6361 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6363 }
else if (
ID ==
".amdhsa_exception_fp_ieee_inexact") {
6365 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6367 }
else if (
ID ==
".amdhsa_exception_int_div_zero") {
6369 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6371 }
else if (
ID ==
".amdhsa_round_robin_scheduling") {
6372 if (IVersion.
Major < 12)
6373 return Error(IDRange.
Start,
"directive requires gfx12+", IDRange);
6375 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6378 return Error(IDRange.
Start,
"unknown .amdhsa_kernel directive", IDRange);
6381#undef PARSE_BITS_ENTRY
6384 if (!Seen.
contains(
".amdhsa_next_free_vgpr"))
6385 return TokError(
".amdhsa_next_free_vgpr directive is required");
6387 if (!Seen.
contains(
".amdhsa_next_free_sgpr"))
6388 return TokError(
".amdhsa_next_free_sgpr directive is required");
6390 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6392 return TokError(
"too many user SGPRs enabled, found " +
6393 Twine(UserSGPRCount) +
", but only " +
6399 if (PreloadLength) {
6405 const MCExpr *VGPRBlocks;
6406 const MCExpr *SGPRBlocks;
6407 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6408 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6409 EnableWavefrontSize32, NextFreeVGPR,
6410 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6414 int64_t EvaluatedVGPRBlocks;
6415 bool VGPRBlocksEvaluatable =
6416 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6417 if (VGPRBlocksEvaluatable &&
6419 static_cast<uint64_t
>(EvaluatedVGPRBlocks))) {
6420 return OutOfRangeError(VGPRRange);
6424 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6425 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT,
getContext());
6427 int64_t EvaluatedSGPRBlocks;
6428 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6430 static_cast<uint64_t
>(EvaluatedSGPRBlocks)))
6431 return OutOfRangeError(SGPRRange);
6434 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6435 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
getContext());
6437 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6438 return TokError(
"amdgpu_user_sgpr_count smaller than implied by "
6439 "enabled user SGPRs");
6445 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6446 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
getContext());
6451 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6452 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
getContext());
6457 return TokError(
"Kernarg size should be resolvable");
6458 uint64_t kernarg_size = IVal;
6459 if (PreloadLength && kernarg_size &&
6460 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6461 return TokError(
"Kernarg preload length + offset is larger than the "
6462 "kernarg segment size");
6465 if (!Seen.
contains(
".amdhsa_accum_offset"))
6466 return TokError(
".amdhsa_accum_offset directive is required");
6467 int64_t EvaluatedAccum;
6468 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6469 uint64_t UEvaluatedAccum = EvaluatedAccum;
6470 if (AccumEvaluatable &&
6471 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6472 return TokError(
"accum_offset should be in range [4..256] in "
6475 int64_t EvaluatedNumVGPR;
6476 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6479 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6480 return TokError(
"accum_offset exceeds total VGPR allocation");
6486 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6487 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6493 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6494 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6497 if (IVersion.
Major >= 10 && IVersion.
Major < 12) {
6499 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6500 return TokError(
"shared_vgpr_count directive not valid on "
6501 "wavefront size 32");
6504 if (VGPRBlocksEvaluatable &&
6505 (SharedVGPRCount * 2 +
static_cast<uint64_t
>(EvaluatedVGPRBlocks) >
6507 return TokError(
"shared_vgpr_count*2 + "
6508 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6513 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6514 NextFreeVGPR, NextFreeSGPR,
6515 ReserveVCC, ReserveFlatScr);
6519bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6521 if (ParseAsAbsoluteExpression(
Version))
6524 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(
Version);
6528bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef
ID,
6529 AMDGPUMCKernelCodeT &
C) {
6532 if (
ID ==
"max_scratch_backing_memory_byte_size") {
6533 Parser.eatToEndOfStatement();
6537 SmallString<40> ErrStr;
6538 raw_svector_ostream Err(ErrStr);
6539 if (!
C.ParseKernelCodeT(
ID, getParser(), Err)) {
6540 return TokError(Err.
str());
6544 if (
ID ==
"enable_wavefront_size32") {
6547 return TokError(
"enable_wavefront_size32=1 is only allowed on GFX10+");
6549 return TokError(
"enable_wavefront_size32=1 requires +WavefrontSize32");
6552 return TokError(
"enable_wavefront_size32=0 requires +WavefrontSize64");
6556 if (
ID ==
"wavefront_size") {
6557 if (
C.wavefront_size == 5) {
6559 return TokError(
"wavefront_size=5 is only allowed on GFX10+");
6561 return TokError(
"wavefront_size=5 requires +WavefrontSize32");
6562 }
else if (
C.wavefront_size == 6) {
6564 return TokError(
"wavefront_size=6 requires +WavefrontSize64");
6571bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6572 AMDGPUMCKernelCodeT KernelCode;
6581 if (!parseId(
ID,
"expected value identifier or .end_amd_kernel_code_t"))
6584 if (
ID ==
".end_amd_kernel_code_t")
6587 if (ParseAMDKernelCodeTValue(
ID, KernelCode))
6592 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6597bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6598 StringRef KernelName;
6599 if (!parseId(KernelName,
"expected symbol name"))
6602 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6609bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6610 if (!getSTI().getTargetTriple().isAMDGCN()) {
6611 return Error(getLoc(),
6612 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6616 auto TargetIDDirective = getLexer().getTok().getStringContents();
6617 if (getTargetStreamer().getTargetID()->
toString() != TargetIDDirective)
6618 return Error(getParser().getTok().getLoc(),
"target id must match options");
6620 getTargetStreamer().EmitISAVersion();
6626bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6629 std::string HSAMetadataString;
6634 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6635 return Error(getLoc(),
"invalid HSA metadata");
6642bool AMDGPUAsmParser::ParseToEndDirective(
const char *AssemblerDirectiveBegin,
6643 const char *AssemblerDirectiveEnd,
6644 std::string &CollectString) {
6646 raw_string_ostream CollectStream(CollectString);
6648 getLexer().setSkipSpace(
false);
6650 bool FoundEnd =
false;
6653 CollectStream << getTokenStr();
6657 if (trySkipId(AssemblerDirectiveEnd)) {
6662 CollectStream << Parser.parseStringToEndOfStatement()
6663 <<
getContext().getAsmInfo().getSeparatorString();
6665 Parser.eatToEndOfStatement();
6668 getLexer().setSkipSpace(
true);
6671 return TokError(Twine(
"expected directive ") +
6672 Twine(AssemblerDirectiveEnd) + Twine(
" not found"));
6679bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6685 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6686 if (!PALMetadata->setFromString(
String))
6687 return Error(getLoc(),
"invalid PAL metadata");
6692bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6694 return Error(getLoc(),
6696 "not available on non-amdpal OSes")).str());
6699 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6700 PALMetadata->setLegacy();
6703 if (ParseAsAbsoluteExpression(
Key)) {
6704 return TokError(Twine(
"invalid value in ") +
6708 return TokError(Twine(
"expected an even number of values in ") +
6711 if (ParseAsAbsoluteExpression(
Value)) {
6712 return TokError(Twine(
"invalid value in ") +
6715 PALMetadata->setRegister(
Key,
Value);
6724bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6725 if (getParser().checkForValidSection())
6729 SMLoc NameLoc = getLoc();
6730 if (getParser().parseIdentifier(Name))
6731 return TokError(
"expected identifier in directive");
6734 if (getParser().parseComma())
6740 SMLoc SizeLoc = getLoc();
6741 if (getParser().parseAbsoluteExpression(
Size))
6744 return Error(SizeLoc,
"size must be non-negative");
6745 if (
Size > LocalMemorySize)
6746 return Error(SizeLoc,
"size is too large");
6748 int64_t Alignment = 4;
6750 SMLoc AlignLoc = getLoc();
6751 if (getParser().parseAbsoluteExpression(Alignment))
6754 return Error(AlignLoc,
"alignment must be a power of two");
6759 if (Alignment >= 1u << 31)
6760 return Error(AlignLoc,
"alignment is too large");
6766 Symbol->redefineIfPossible();
6767 if (!
Symbol->isUndefined())
6768 return Error(NameLoc,
"invalid symbol redefinition");
6770 getTargetStreamer().emitAMDGPULDS(Symbol,
Size,
Align(Alignment));
6774bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6775 if (getParser().checkForValidSection())
6779 if (getParser().parseIdentifier(FuncName))
6780 return TokError(
"expected symbol name after .amdgpu_info");
6783 AMDGPU::InfoSectionData ParsedInfoData;
6784 AMDGPU::FuncInfo FI;
6786 bool HasScalarAttrs =
false;
6793 SMLoc IDLoc = getLoc();
6794 if (!parseId(
ID,
"expected directive or .end_amdgpu_info"))
6797 if (
ID ==
".end_amdgpu_info")
6805 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6807 if (Dir ==
"flags") {
6809 if (getParser().parseAbsoluteExpression(Val))
6812 FI.
UsesVCC = !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6814 !!(
Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6816 HasScalarAttrs =
true;
6817 }
else if (Dir ==
"num_sgpr") {
6819 if (getParser().parseAbsoluteExpression(Val))
6821 FI.
NumSGPR =
static_cast<uint32_t
>(Val);
6822 HasScalarAttrs =
true;
6823 }
else if (Dir ==
"num_vgpr") {
6825 if (getParser().parseAbsoluteExpression(Val))
6828 HasScalarAttrs =
true;
6829 }
else if (Dir ==
"num_agpr") {
6831 if (getParser().parseAbsoluteExpression(Val))
6834 HasScalarAttrs =
true;
6835 }
else if (Dir ==
"private_segment_size") {
6837 if (getParser().parseAbsoluteExpression(Val))
6840 HasScalarAttrs =
true;
6841 }
else if (Dir ==
"use") {
6843 if (getParser().parseIdentifier(ResName))
6844 return TokError(
"expected resource symbol for .amdgpu_use");
6845 ParsedInfoData.
Uses.push_back(
6846 {FuncSym,
getContext().getOrCreateSymbol(ResName)});
6847 }
else if (Dir ==
"call") {
6849 if (getParser().parseIdentifier(DstName))
6850 return TokError(
"expected callee symbol for .amdgpu_call");
6851 ParsedInfoData.
Calls.push_back(
6852 {FuncSym,
getContext().getOrCreateSymbol(DstName)});
6853 }
else if (Dir ==
"indirect_call") {
6855 if (getParser().parseEscapedString(TypeId))
6856 return TokError(
"expected type ID string for .amdgpu_indirect_call");
6857 ParsedInfoData.
IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6858 }
else if (Dir ==
"typeid") {
6860 if (getParser().parseEscapedString(TypeId))
6861 return TokError(
"expected type ID string for .amdgpu_typeid");
6862 ParsedInfoData.
TypeIds.push_back({FuncSym, std::move(TypeId)});
6864 return Error(IDLoc,
"unknown .amdgpu_info directive '" +
ID +
"'");
6869 ParsedInfoData.
Funcs.push_back(std::move(FI));
6871 AMDGPU::InfoSectionData &
Data = InfoData ? *InfoData : InfoData.emplace();
6872 for (AMDGPU::FuncInfo &Func : ParsedInfoData.
Funcs)
6873 Data.Funcs.push_back(std::move(Func));
6874 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.
Uses)
6875 Data.Uses.push_back(Use);
6876 for (std::pair<MCSymbol *, MCSymbol *> &
Call : ParsedInfoData.
Calls)
6878 for (std::pair<MCSymbol *, std::string> &
IndirectCall :
6881 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.
TypeIds)
6882 Data.TypeIds.push_back(std::move(TypeId));
6887void AMDGPUAsmParser::onEndOfFile() {
6889 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6892bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6893 StringRef IDVal = DirectiveID.
getString();
6896 if (IDVal ==
".amdhsa_kernel")
6897 return ParseDirectiveAMDHSAKernel();
6899 if (IDVal ==
".amdhsa_code_object_version")
6900 return ParseDirectiveAMDHSACodeObjectVersion();
6904 return ParseDirectiveHSAMetadata();
6906 if (IDVal ==
".amd_kernel_code_t")
6907 return ParseDirectiveAMDKernelCodeT();
6909 if (IDVal ==
".amdgpu_hsa_kernel")
6910 return ParseDirectiveAMDGPUHsaKernel();
6912 if (IDVal ==
".amd_amdgpu_isa")
6913 return ParseDirectiveISAVersion();
6917 Twine(
" directive is "
6918 "not available on non-amdhsa OSes"))
6923 if (IDVal ==
".amdgcn_target")
6924 return ParseDirectiveAMDGCNTarget();
6926 if (IDVal ==
".amdgpu_lds")
6927 return ParseDirectiveAMDGPULDS();
6929 if (IDVal ==
".amdgpu_info")
6930 return ParseDirectiveAMDGPUInfo();
6933 return ParseDirectivePALMetadataBegin();
6936 return ParseDirectivePALMetadata();
6941bool AMDGPUAsmParser::subtargetHasRegister(
const MCRegisterInfo &MRI,
6948 return hasSGPR104_SGPR105();
6951 case SRC_SHARED_BASE_LO:
6952 case SRC_SHARED_BASE:
6953 case SRC_SHARED_LIMIT_LO:
6954 case SRC_SHARED_LIMIT:
6955 case SRC_PRIVATE_BASE_LO:
6956 case SRC_PRIVATE_BASE:
6957 case SRC_PRIVATE_LIMIT_LO:
6958 case SRC_PRIVATE_LIMIT:
6960 case SRC_FLAT_SCRATCH_BASE_LO:
6961 case SRC_FLAT_SCRATCH_BASE_HI:
6962 return hasGloballyAddressableScratch();
6963 case SRC_POPS_EXITING_WAVE_ID:
6975 return (
isVI() ||
isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7005 return hasSGPR102_SGPR103();
7010ParseStatus AMDGPUAsmParser::parseOperand(
OperandVector &Operands,
7013 ParseStatus Res = parseVOPD(Operands);
7018 Res = MatchOperandParserImpl(Operands, Mnemonic);
7030 SMLoc LBraceLoc = getLoc();
7035 auto Loc = getLoc();
7036 Res = parseReg(Operands);
7038 Error(Loc,
"expected a register");
7042 RBraceLoc = getLoc();
7047 "expected a comma or a closing square bracket"))
7051 if (Operands.
size() - Prefix > 1) {
7053 AMDGPUOperand::CreateToken(
this,
"[", LBraceLoc));
7054 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"]", RBraceLoc));
7060 return parseRegOrImm(Operands);
7063StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7065 setForcedEncodingSize(0);
7066 setForcedDPP(
false);
7067 setForcedSDWA(
false);
7069 if (
Name.consume_back(
"_e64_dpp")) {
7071 setForcedEncodingSize(64);
7074 if (
Name.consume_back(
"_e64")) {
7075 setForcedEncodingSize(64);
7078 if (
Name.consume_back(
"_e32")) {
7079 setForcedEncodingSize(32);
7082 if (
Name.consume_back(
"_dpp")) {
7086 if (
Name.consume_back(
"_sdwa")) {
7087 setForcedSDWA(
true);
7095 unsigned VariantID);
7101 Name = parseMnemonicSuffix(Name);
7107 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, NameLoc));
7109 bool IsMIMG = Name.starts_with(
"image_");
7112 OperandMode
Mode = OperandMode_Default;
7114 Mode = OperandMode_NSA;
7118 checkUnsupportedInstruction(Name, NameLoc);
7119 if (!Parser.hasPendingError()) {
7122 :
"not a valid operand.";
7123 Error(getLoc(), Msg);
7142ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7145 if (!trySkipId(Name))
7148 Operands.
push_back(AMDGPUOperand::CreateToken(
this, Name, S));
7152ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
const char *Prefix,
7161ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7162 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7163 std::function<
bool(int64_t &)> ConvertResult) {
7167 ParseStatus Res = parseIntWithPrefix(Prefix,
Value);
7171 if (ConvertResult && !ConvertResult(
Value)) {
7172 Error(S,
"invalid " + StringRef(Prefix) +
" value.");
7175 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Value, S, ImmTy));
7179ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7180 const char *Prefix,
OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7181 bool (*ConvertResult)(int64_t &)) {
7190 const unsigned MaxSize = 4;
7194 for (
int I = 0; ; ++
I) {
7196 SMLoc Loc = getLoc();
7200 if (
Op != 0 &&
Op != 1)
7201 return Error(Loc,
"invalid " + StringRef(Prefix) +
" value.");
7208 if (
I + 1 == MaxSize)
7209 return Error(getLoc(),
"expected a closing square bracket");
7215 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Val, S, ImmTy));
7219ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7221 AMDGPUOperand::ImmTy ImmTy,
7222 bool IgnoreNegative) {
7226 if (trySkipId(Name)) {
7228 }
else if (trySkipId(
"no", Name)) {
7237 return Error(S,
"r128 modifier is not supported on this GPU");
7238 if (Name ==
"a16" && !
hasA16())
7239 return Error(S,
"a16 modifier is not supported on this GPU");
7241 if (Bit == 0 && Name ==
"gds") {
7242 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7244 return Error(S,
"nogds is not allowed");
7247 if (
isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7248 ImmTy = AMDGPUOperand::ImmTyR128A16;
7250 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Bit, S, ImmTy));
7254unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7255 bool &Disabling)
const {
7256 Disabling =
Id.consume_front(
"no");
7259 return StringSwitch<unsigned>(Id)
7266 return StringSwitch<unsigned>(Id)
7274ParseStatus AMDGPUAsmParser::parseCPol(
OperandVector &Operands) {
7276 SMLoc StringLoc = getLoc();
7278 int64_t CPolVal = 0;
7287 ResTH = parseTH(Operands, TH);
7298 ResScope = parseScope(Operands, Scope);
7311 if (trySkipId(
"nv")) {
7315 }
else if (trySkipId(
"no",
"nv")) {
7322 if (trySkipId(
"scale_offset")) {
7326 }
else if (trySkipId(
"no",
"scale_offset")) {
7339 Operands.
push_back(AMDGPUOperand::CreateImm(
this, CPolVal, StringLoc,
7340 AMDGPUOperand::ImmTyCPol));
7344 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).
getToken();
7345 SMLoc OpLoc = getLoc();
7346 unsigned Enabled = 0, Seen = 0;
7350 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7357 return Error(S,
"dlc modifier is not supported on this GPU");
7360 return Error(S,
"scc modifier is not supported on this GPU");
7363 return Error(S,
"duplicate cache policy modifier");
7375 AMDGPUOperand::CreateImm(
this,
Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7379ParseStatus AMDGPUAsmParser::parseScope(
OperandVector &Operands,
7384 ParseStatus Res = parseStringOrIntWithPrefix(
7385 Operands,
"scope", {
"SCOPE_CU",
"SCOPE_SE",
"SCOPE_DEV",
"SCOPE_SYS"},
7394ParseStatus AMDGPUAsmParser::parseTH(
OperandVector &Operands, int64_t &TH) {
7399 ParseStatus Res = parseStringWithPrefix(
"th",
Value, StringLoc);
7403 if (
Value ==
"TH_DEFAULT")
7405 else if (
Value ==
"TH_STORE_LU" ||
Value ==
"TH_LOAD_WB" ||
7406 Value ==
"TH_LOAD_NT_WB") {
7407 return Error(StringLoc,
"invalid th value");
7408 }
else if (
Value.consume_front(
"TH_ATOMIC_")) {
7410 }
else if (
Value.consume_front(
"TH_LOAD_")) {
7412 }
else if (
Value.consume_front(
"TH_STORE_")) {
7415 return Error(StringLoc,
"invalid th value");
7418 if (
Value ==
"BYPASS")
7423 TH |= StringSwitch<int64_t>(
Value)
7433 .Default(0xffffffff);
7435 TH |= StringSwitch<int64_t>(
Value)
7446 .Default(0xffffffff);
7449 if (TH == 0xffffffff)
7450 return Error(StringLoc,
"invalid th value");
7457 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7458 AMDGPUOperand::ImmTy ImmT, int64_t
Default = 0,
7459 std::optional<unsigned> InsertAt = std::nullopt) {
7460 auto i = OptionalIdx.find(ImmT);
7461 if (i != OptionalIdx.end()) {
7462 unsigned Idx = i->second;
7463 const AMDGPUOperand &
Op =
7464 static_cast<const AMDGPUOperand &
>(*Operands[Idx]);
7468 Op.addImmOperands(Inst, 1);
7470 if (InsertAt.has_value())
7477ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7483 StringLoc = getLoc();
7488ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7489 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7494 SMLoc StringLoc = getLoc();
7498 Value = getTokenStr();
7502 if (
Value == Ids[IntVal])
7507 if (IntVal < 0 || IntVal >= (int64_t)Ids.
size())
7508 return Error(StringLoc,
"invalid " + Twine(Name) +
" value");
7513ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7514 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7515 AMDGPUOperand::ImmTy
Type) {
7519 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7521 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S,
Type));
7530bool AMDGPUAsmParser::tryParseFmt(
const char *Pref,
7534 SMLoc Loc = getLoc();
7536 auto Res = parseIntWithPrefix(Pref, Val);
7542 if (Val < 0 || Val > MaxVal) {
7543 Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7551ParseStatus AMDGPUAsmParser::tryParseIndexKey(
OperandVector &Operands,
7552 AMDGPUOperand::ImmTy ImmTy) {
7553 const char *Pref =
"index_key";
7555 SMLoc Loc = getLoc();
7556 auto Res = parseIntWithPrefix(Pref, ImmVal);
7560 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7561 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7562 (ImmVal < 0 || ImmVal > 1))
7563 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7565 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7566 return Error(Loc, Twine(
"out of range ", StringRef(Pref)));
7568 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, ImmTy));
7572ParseStatus AMDGPUAsmParser::parseIndexKey8bit(
OperandVector &Operands) {
7573 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7576ParseStatus AMDGPUAsmParser::parseIndexKey16bit(
OperandVector &Operands) {
7577 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7580ParseStatus AMDGPUAsmParser::parseIndexKey32bit(
OperandVector &Operands) {
7581 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7584ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(
OperandVector &Operands,
7586 AMDGPUOperand::ImmTy
Type) {
7591ParseStatus AMDGPUAsmParser::parseMatrixAFMT(
OperandVector &Operands) {
7592 return tryParseMatrixFMT(Operands,
"matrix_a_fmt",
7593 AMDGPUOperand::ImmTyMatrixAFMT);
7596ParseStatus AMDGPUAsmParser::parseMatrixBFMT(
OperandVector &Operands) {
7597 return tryParseMatrixFMT(Operands,
"matrix_b_fmt",
7598 AMDGPUOperand::ImmTyMatrixBFMT);
7601ParseStatus AMDGPUAsmParser::tryParseMatrixScale(
OperandVector &Operands,
7603 AMDGPUOperand::ImmTy
Type) {
7608ParseStatus AMDGPUAsmParser::parseMatrixAScale(
OperandVector &Operands) {
7609 return tryParseMatrixScale(Operands,
"matrix_a_scale",
7610 AMDGPUOperand::ImmTyMatrixAScale);
7613ParseStatus AMDGPUAsmParser::parseMatrixBScale(
OperandVector &Operands) {
7614 return tryParseMatrixScale(Operands,
"matrix_b_scale",
7615 AMDGPUOperand::ImmTyMatrixBScale);
7618ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(
OperandVector &Operands,
7620 AMDGPUOperand::ImmTy
Type) {
7625ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(
OperandVector &Operands) {
7626 return tryParseMatrixScaleFmt(Operands,
"matrix_a_scale_fmt",
7627 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7630ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(
OperandVector &Operands) {
7631 return tryParseMatrixScaleFmt(Operands,
"matrix_b_scale_fmt",
7632 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7637ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &
Format) {
7638 using namespace llvm::AMDGPU::MTBUFFormat;
7644 for (
int I = 0;
I < 2; ++
I) {
7645 if (Dfmt == DFMT_UNDEF && !tryParseFmt(
"dfmt", DFMT_MAX, Dfmt))
7648 if (Nfmt == NFMT_UNDEF && !tryParseFmt(
"nfmt", NFMT_MAX, Nfmt))
7653 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7659 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7662 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7663 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7669ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &
Format) {
7670 using namespace llvm::AMDGPU::MTBUFFormat;
7674 if (!tryParseFmt(
"format", UFMT_MAX, Fmt))
7677 if (Fmt == UFMT_UNDEF)
7684bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7686 StringRef FormatStr,
7688 using namespace llvm::AMDGPU::MTBUFFormat;
7692 if (
Format != DFMT_UNDEF) {
7698 if (
Format != NFMT_UNDEF) {
7703 Error(Loc,
"unsupported format");
7707ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7710 using namespace llvm::AMDGPU::MTBUFFormat;
7714 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7719 SMLoc Loc = getLoc();
7720 if (!parseId(Str,
"expected a format string") ||
7721 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7723 if (Dfmt == DFMT_UNDEF)
7724 return Error(Loc,
"duplicate numeric format");
7725 if (Nfmt == NFMT_UNDEF)
7726 return Error(Loc,
"duplicate data format");
7729 Dfmt = (Dfmt ==
DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7730 Nfmt = (Nfmt ==
NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7734 if (Ufmt == UFMT_UNDEF)
7735 return Error(FormatLoc,
"unsupported format");
7744ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7747 using namespace llvm::AMDGPU::MTBUFFormat;
7750 if (Id == UFMT_UNDEF)
7754 return Error(Loc,
"unified format is not supported on this GPU");
7760ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &
Format) {
7761 using namespace llvm::AMDGPU::MTBUFFormat;
7762 SMLoc Loc = getLoc();
7767 return Error(Loc,
"out of range format");
7772ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &
Format) {
7773 using namespace llvm::AMDGPU::MTBUFFormat;
7779 StringRef FormatStr;
7780 SMLoc Loc = getLoc();
7781 if (!parseId(FormatStr,
"expected a format string"))
7784 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc,
Format);
7786 Res = parseSymbolicSplitFormat(FormatStr, Loc,
Format);
7796 return parseNumericFormat(
Format);
7799ParseStatus AMDGPUAsmParser::parseFORMAT(
OperandVector &Operands) {
7800 using namespace llvm::AMDGPU::MTBUFFormat;
7804 SMLoc Loc = getLoc();
7814 AMDGPUOperand::CreateImm(
this,
Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7826 Res = parseRegOrImm(Operands);
7833 Res = parseSymbolicOrNumericFormat(
Format);
7838 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
Size - 2]);
7839 assert(
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7846 return Error(getLoc(),
"duplicate format");
7850ParseStatus AMDGPUAsmParser::parseFlatOffset(
OperandVector &Operands) {
7852 parseIntWithPrefix(
"offset", Operands, AMDGPUOperand::ImmTyOffset);
7854 Res = parseIntWithPrefix(
"inst_offset", Operands,
7855 AMDGPUOperand::ImmTyInstOffset);
7860ParseStatus AMDGPUAsmParser::parseR128A16(
OperandVector &Operands) {
7862 parseNamedBit(
"r128", Operands, AMDGPUOperand::ImmTyR128A16);
7864 Res = parseNamedBit(
"a16", Operands, AMDGPUOperand::ImmTyA16);
7868ParseStatus AMDGPUAsmParser::parseBLGP(
OperandVector &Operands) {
7870 parseIntWithPrefix(
"blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7873 parseOperandArrayWithPrefix(
"neg", Operands, AMDGPUOperand::ImmTyBLGP);
7882void AMDGPUAsmParser::cvtExp(MCInst &Inst,
const OperandVector &Operands) {
7883 OptionalImmIndexMap OptionalIdx;
7885 unsigned OperandIdx[4];
7886 unsigned EnMask = 0;
7889 for (
unsigned i = 1, e = Operands.
size(); i != e; ++i) {
7890 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
7895 OperandIdx[SrcIdx] = Inst.
size();
7896 Op.addRegOperands(Inst, 1);
7903 OperandIdx[SrcIdx] = Inst.
size();
7909 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7910 Op.addImmOperands(Inst, 1);
7914 if (
Op.isToken() && (
Op.getToken() ==
"done" ||
Op.getToken() ==
"row_en"))
7918 OptionalIdx[
Op.getImmTy()] = i;
7924 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7931 for (
auto i = 0; i < SrcIdx; ++i) {
7933 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7958 IntVal =
encode(ISA, IntVal, CntVal);
7959 if (CntVal !=
decode(ISA, IntVal)) {
7961 IntVal =
encode(ISA, IntVal, -1);
7969bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7971 SMLoc CntLoc = getLoc();
7972 StringRef CntName = getTokenStr();
7979 SMLoc ValLoc = getLoc();
7988 if (CntName ==
"vmcnt" || CntName ==
"vmcnt_sat") {
7990 }
else if (CntName ==
"expcnt" || CntName ==
"expcnt_sat") {
7992 }
else if (CntName ==
"lgkmcnt" || CntName ==
"lgkmcnt_sat") {
7995 Error(CntLoc,
"invalid counter name " + CntName);
8000 Error(ValLoc,
"too large value for " + CntName);
8009 Error(getLoc(),
"expected a counter name");
8017ParseStatus AMDGPUAsmParser::parseSWaitCnt(
OperandVector &Operands) {
8024 if (!parseCnt(Waitcnt))
8032 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Waitcnt, S));
8036bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8037 SMLoc FieldLoc = getLoc();
8038 StringRef FieldName = getTokenStr();
8043 SMLoc ValueLoc = getLoc();
8050 if (FieldName ==
"instid0") {
8052 }
else if (FieldName ==
"instskip") {
8054 }
else if (FieldName ==
"instid1") {
8057 Error(FieldLoc,
"invalid field name " + FieldName);
8076 .Case(
"VALU_DEP_1", 1)
8077 .Case(
"VALU_DEP_2", 2)
8078 .Case(
"VALU_DEP_3", 3)
8079 .Case(
"VALU_DEP_4", 4)
8080 .Case(
"TRANS32_DEP_1", 5)
8081 .Case(
"TRANS32_DEP_2", 6)
8082 .Case(
"TRANS32_DEP_3", 7)
8083 .Case(
"FMA_ACCUM_CYCLE_1", 8)
8084 .Case(
"SALU_CYCLE_1", 9)
8085 .Case(
"SALU_CYCLE_2", 10)
8086 .Case(
"SALU_CYCLE_3", 11)
8094 Delay |=
Value << Shift;
8098ParseStatus AMDGPUAsmParser::parseSDelayALU(
OperandVector &Operands) {
8104 if (!parseDelay(Delay))
8112 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Delay, S));
8117AMDGPUOperand::isSWaitCnt()
const {
8121bool AMDGPUOperand::isSDelayALU()
const {
return isImm(); }
8127void AMDGPUAsmParser::depCtrError(SMLoc Loc,
int ErrorId,
8128 StringRef DepCtrName) {
8131 Error(Loc, Twine(
"invalid counter name ", DepCtrName));
8134 Error(Loc, Twine(DepCtrName,
" is not supported on this GPU"));
8137 Error(Loc, Twine(
"duplicate counter name ", DepCtrName));
8140 Error(Loc, Twine(
"invalid value for ", DepCtrName));
8147bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr,
unsigned &UsedOprMask) {
8149 using namespace llvm::AMDGPU::DepCtr;
8151 SMLoc DepCtrLoc = getLoc();
8152 StringRef DepCtrName = getTokenStr();
8162 unsigned PrevOprMask = UsedOprMask;
8163 int CntVal =
encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8166 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8175 Error(getLoc(),
"expected a counter name");
8180 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8181 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8185ParseStatus AMDGPUAsmParser::parseDepCtr(
OperandVector &Operands) {
8186 using namespace llvm::AMDGPU::DepCtr;
8189 SMLoc Loc = getLoc();
8192 unsigned UsedOprMask = 0;
8194 if (!parseDepCtr(DepCtr, UsedOprMask))
8202 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DepCtr, Loc));
8206bool AMDGPUOperand::isDepCtr()
const {
return isS16Imm(); }
8212ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8214 OperandInfoTy &Width) {
8215 using namespace llvm::AMDGPU::Hwreg;
8221 HwReg.Loc = getLoc();
8224 HwReg.IsSymbolic =
true;
8226 }
else if (!
parseExpr(HwReg.Val,
"a register name")) {
8234 if (!skipToken(
AsmToken::Comma,
"expected a comma or a closing parenthesis"))
8244 Width.Loc = getLoc();
8252ParseStatus AMDGPUAsmParser::parseHwreg(
OperandVector &Operands) {
8253 using namespace llvm::AMDGPU::Hwreg;
8256 SMLoc Loc = getLoc();
8258 StructuredOpField HwReg(
"id",
"hardware register", HwregId::Width,
8260 StructuredOpField
Offset(
"offset",
"bit offset", HwregOffset::Width,
8261 HwregOffset::Default);
8262 struct : StructuredOpField {
8263 using StructuredOpField::StructuredOpField;
8264 bool validate(AMDGPUAsmParser &Parser)
const override {
8266 return Error(Parser,
"only values from 1 to 32 are legal");
8269 } Width(
"size",
"bitfield width", HwregSize::Width, HwregSize::Default);
8270 ParseStatus Res = parseStructuredOpFields({&HwReg, &
Offset, &Width});
8273 Res = parseHwregFunc(HwReg,
Offset, Width);
8276 if (!validateStructuredOpFields({&HwReg, &
Offset, &Width}))
8278 ImmVal = HwregEncoding::encode(HwReg.Val,
Offset.Val, Width.Val);
8282 parseExpr(ImmVal,
"a hwreg macro, structured immediate"))
8289 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8291 AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8295bool AMDGPUOperand::isHwreg()
const {
8296 return isImmTy(ImmTyHwreg);
8304AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8306 OperandInfoTy &Stream) {
8307 using namespace llvm::AMDGPU::SendMsg;
8312 Msg.IsSymbolic =
true;
8314 }
else if (!
parseExpr(Msg.Val,
"a message name")) {
8319 Op.IsDefined =
true;
8322 (
Op.Val =
getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8325 }
else if (!
parseExpr(
Op.Val,
"an operation name")) {
8330 Stream.IsDefined =
true;
8331 Stream.Loc = getLoc();
8341AMDGPUAsmParser::validateSendMsg(
const OperandInfoTy &Msg,
8342 const OperandInfoTy &
Op,
8343 const OperandInfoTy &Stream) {
8344 using namespace llvm::AMDGPU::SendMsg;
8349 bool Strict = Msg.IsSymbolic;
8353 Error(Msg.Loc,
"specified message id is not supported on this GPU");
8358 Error(Msg.Loc,
"invalid message id");
8364 Error(
Op.Loc,
"message does not support operations");
8366 Error(Msg.Loc,
"missing message operation");
8372 Error(
Op.Loc,
"specified operation id is not supported on this GPU");
8374 Error(
Op.Loc,
"invalid operation id");
8379 Error(Stream.Loc,
"message operation does not support streams");
8383 Error(Stream.Loc,
"invalid message stream id");
8389ParseStatus AMDGPUAsmParser::parseSendMsg(
OperandVector &Operands) {
8390 using namespace llvm::AMDGPU::SendMsg;
8393 SMLoc Loc = getLoc();
8397 OperandInfoTy
Op(OP_NONE_);
8398 OperandInfoTy Stream(STREAM_ID_NONE_);
8399 if (parseSendMsgBody(Msg,
Op, Stream) &&
8400 validateSendMsg(Msg,
Op, Stream)) {
8405 }
else if (
parseExpr(ImmVal,
"a sendmsg macro")) {
8407 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8412 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8416bool AMDGPUOperand::isSendMsg()
const {
8417 return isImmTy(ImmTySendMsg);
8420ParseStatus AMDGPUAsmParser::parseWaitEvent(
OperandVector &Operands) {
8421 using namespace llvm::AMDGPU::WaitEvent;
8423 SMLoc Loc = getLoc();
8426 StructuredOpField DontWaitExportReady(
"dont_wait_export_ready",
"bit value",
8428 StructuredOpField ExportReady(
"export_ready",
"bit value", 1, 0);
8430 StructuredOpField *TargetBitfield =
8431 isGFX11() ? &DontWaitExportReady : &ExportReady;
8433 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8437 if (!validateStructuredOpFields({TargetBitfield}))
8439 ImmVal = TargetBitfield->Val;
8446 return Error(Loc,
"invalid immediate: only 16-bit values are legal");
8448 Operands.
push_back(AMDGPUOperand::CreateImm(
this, ImmVal, Loc,
8449 AMDGPUOperand::ImmTyWaitEvent));
8453bool AMDGPUOperand::isWaitEvent()
const {
return isImmTy(ImmTyWaitEvent); }
8459ParseStatus AMDGPUAsmParser::parseInterpSlot(
OperandVector &Operands) {
8466 int Slot = StringSwitch<int>(Str)
8473 return Error(S,
"invalid interpolation slot");
8475 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Slot, S,
8476 AMDGPUOperand::ImmTyInterpSlot));
8480ParseStatus AMDGPUAsmParser::parseInterpAttr(
OperandVector &Operands) {
8487 if (!Str.starts_with(
"attr"))
8488 return Error(S,
"invalid interpolation attribute");
8490 StringRef Chan = Str.take_back(2);
8491 int AttrChan = StringSwitch<int>(Chan)
8498 return Error(S,
"invalid or missing interpolation attribute channel");
8500 Str = Str.drop_back(2).drop_front(4);
8503 if (Str.getAsInteger(10, Attr))
8504 return Error(S,
"invalid or missing interpolation attribute number");
8507 return Error(S,
"out of bounds interpolation attribute number");
8511 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Attr, S,
8512 AMDGPUOperand::ImmTyInterpAttr));
8513 Operands.
push_back(AMDGPUOperand::CreateImm(
8514 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8522ParseStatus AMDGPUAsmParser::parseExpTgt(
OperandVector &Operands) {
8523 using namespace llvm::AMDGPU::Exp;
8533 return Error(S, (Id == ET_INVALID)
8534 ?
"invalid exp target"
8535 :
"exp target is not supported on this GPU");
8537 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Id, S,
8538 AMDGPUOperand::ImmTyExpTgt));
8547AMDGPUAsmParser::isId(
const AsmToken &Token,
const StringRef Id)
const {
8552AMDGPUAsmParser::isId(
const StringRef Id)
const {
8558 return getTokenKind() ==
Kind;
8561StringRef AMDGPUAsmParser::getId()
const {
8566AMDGPUAsmParser::trySkipId(
const StringRef Id) {
8575AMDGPUAsmParser::trySkipId(
const StringRef Pref,
const StringRef Id) {
8577 StringRef Tok = getTokenStr();
8588 if (isId(Id) && peekToken().is(Kind)) {
8598 if (isToken(Kind)) {
8607 const StringRef ErrMsg) {
8608 if (!trySkipToken(Kind)) {
8609 Error(getLoc(), ErrMsg);
8616AMDGPUAsmParser::parseExpr(int64_t &
Imm, StringRef Expected) {
8620 if (Parser.parseExpression(Expr))
8623 if (Expr->evaluateAsAbsolute(
Imm))
8626 if (Expected.empty()) {
8627 Error(S,
"expected absolute expression");
8629 Error(S, Twine(
"expected ", Expected) +
8630 Twine(
" or an absolute expression"));
8640 if (Parser.parseExpression(Expr))
8644 if (Expr->evaluateAsAbsolute(IntVal)) {
8645 Operands.
push_back(AMDGPUOperand::CreateImm(
this, IntVal, S));
8647 Operands.
push_back(AMDGPUOperand::CreateExpr(
this, Expr, S));
8653AMDGPUAsmParser::parseString(StringRef &Val,
const StringRef ErrMsg) {
8655 Val =
getToken().getStringContents();
8659 Error(getLoc(), ErrMsg);
8664AMDGPUAsmParser::parseId(StringRef &Val,
const StringRef ErrMsg) {
8666 Val = getTokenStr();
8670 if (!ErrMsg.
empty())
8671 Error(getLoc(), ErrMsg);
8676AMDGPUAsmParser::getToken()
const {
8677 return Parser.getTok();
8680AsmToken AMDGPUAsmParser::peekToken(
bool ShouldSkipSpace) {
8683 : getLexer().peekTok(ShouldSkipSpace);
8688 auto TokCount = getLexer().peekTokens(Tokens);
8690 for (
auto Idx = TokCount; Idx < Tokens.
size(); ++Idx)
8695AMDGPUAsmParser::getTokenKind()
const {
8696 return getLexer().getKind();
8700AMDGPUAsmParser::getLoc()
const {
8705AMDGPUAsmParser::getTokenStr()
const {
8710AMDGPUAsmParser::lex() {
8714const AMDGPUOperand &
8715AMDGPUAsmParser::findMCOperand(
const OperandVector &Operands,
8716 int MCOpIdx)
const {
8717 for (
const auto &
Op : Operands) {
8718 const AMDGPUOperand &TargetOp =
static_cast<AMDGPUOperand &
>(*Op);
8719 if (TargetOp.getMCOpIdx() == MCOpIdx)
8725SMLoc AMDGPUAsmParser::getInstLoc(
const OperandVector &Operands)
const {
8726 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8730SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8734SMLoc AMDGPUAsmParser::getOperandLoc(
const OperandVector &Operands,
8735 int MCOpIdx)
const {
8736 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8740AMDGPUAsmParser::getOperandLoc(std::function<
bool(
const AMDGPUOperand&)>
Test,
8742 for (
unsigned i = Operands.
size() - 1; i > 0; --i) {
8743 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
8745 return Op.getStartLoc();
8747 return getInstLoc(Operands);
8751AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy
Type,
8753 auto Test = [=](
const AMDGPUOperand&
Op) {
return Op.isImmTy(
Type); };
8754 return getOperandLoc(
Test, Operands);
8768 StringRef
Id = getTokenStr();
8769 SMLoc IdLoc = getLoc();
8775 find_if(Fields, [Id](StructuredOpField *
F) {
return F->Id ==
Id; });
8776 if (
I == Fields.
end())
8777 return Error(IdLoc,
"unknown field");
8778 if ((*I)->IsDefined)
8779 return Error(IdLoc,
"duplicate field");
8782 (*I)->Loc = getLoc();
8785 (*I)->IsDefined =
true;
8792bool AMDGPUAsmParser::validateStructuredOpFields(
8794 return all_of(Fields, [
this](
const StructuredOpField *
F) {
8795 return F->validate(*
this);
8806 const unsigned OrMask,
8807 const unsigned XorMask) {
8816bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &
Op,
const unsigned MinVal,
8817 const unsigned MaxVal,
8818 const Twine &ErrMsg, SMLoc &Loc) {
8835AMDGPUAsmParser::parseSwizzleOperands(
const unsigned OpNum, int64_t*
Op,
8836 const unsigned MinVal,
8837 const unsigned MaxVal,
8838 const StringRef ErrMsg) {
8840 for (
unsigned i = 0; i < OpNum; ++i) {
8841 if (!parseSwizzleOperand(
Op[i], MinVal, MaxVal, ErrMsg, Loc))
8849AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &
Imm) {
8850 using namespace llvm::AMDGPU::Swizzle;
8853 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8854 "expected a 2-bit lane id")) {
8865AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &
Imm) {
8866 using namespace llvm::AMDGPU::Swizzle;
8872 if (!parseSwizzleOperand(GroupSize,
8874 "group size must be in the interval [2,32]",
8879 Error(Loc,
"group size must be a power of two");
8882 if (parseSwizzleOperand(LaneIdx,
8884 "lane id must be in the interval [0,group size - 1]",
8893AMDGPUAsmParser::parseSwizzleReverse(int64_t &
Imm) {
8894 using namespace llvm::AMDGPU::Swizzle;
8899 if (!parseSwizzleOperand(GroupSize,
8901 "group size must be in the interval [2,32]",
8906 Error(Loc,
"group size must be a power of two");
8915AMDGPUAsmParser::parseSwizzleSwap(int64_t &
Imm) {
8916 using namespace llvm::AMDGPU::Swizzle;
8921 if (!parseSwizzleOperand(GroupSize,
8923 "group size must be in the interval [1,16]",
8928 Error(Loc,
"group size must be a power of two");
8937AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &
Imm) {
8938 using namespace llvm::AMDGPU::Swizzle;
8945 SMLoc StrLoc = getLoc();
8946 if (!parseString(Ctl)) {
8949 if (Ctl.
size() != BITMASK_WIDTH) {
8950 Error(StrLoc,
"expected a 5-character mask");
8954 unsigned AndMask = 0;
8955 unsigned OrMask = 0;
8956 unsigned XorMask = 0;
8958 for (
size_t i = 0; i < Ctl.
size(); ++i) {
8962 Error(StrLoc,
"invalid mask");
8983bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &
Imm) {
8984 using namespace llvm::AMDGPU::Swizzle;
8987 Error(getLoc(),
"FFT mode swizzle not supported on this GPU");
8993 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8994 "FFT swizzle must be in the interval [0," +
8995 Twine(FFT_SWIZZLE_MAX) + Twine(
']'),
9003bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &
Imm) {
9004 using namespace llvm::AMDGPU::Swizzle;
9007 Error(getLoc(),
"Rotate mode swizzle not supported on this GPU");
9014 if (!parseSwizzleOperand(
Direction, 0, 1,
9015 "direction must be 0 (left) or 1 (right)", Loc))
9019 if (!parseSwizzleOperand(
9020 RotateSize, 0, ROTATE_MAX_SIZE,
9021 "number of threads to rotate must be in the interval [0," +
9022 Twine(ROTATE_MAX_SIZE) + Twine(
']'),
9027 (RotateSize << ROTATE_SIZE_SHIFT);
9032AMDGPUAsmParser::parseSwizzleOffset(int64_t &
Imm) {
9034 SMLoc OffsetLoc = getLoc();
9040 Error(OffsetLoc,
"expected a 16-bit offset");
9047AMDGPUAsmParser::parseSwizzleMacro(int64_t &
Imm) {
9048 using namespace llvm::AMDGPU::Swizzle;
9052 SMLoc ModeLoc = getLoc();
9055 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9056 Ok = parseSwizzleQuadPerm(
Imm);
9057 }
else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9058 Ok = parseSwizzleBitmaskPerm(
Imm);
9059 }
else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9060 Ok = parseSwizzleBroadcast(
Imm);
9061 }
else if (trySkipId(IdSymbolic[ID_SWAP])) {
9062 Ok = parseSwizzleSwap(
Imm);
9063 }
else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9064 Ok = parseSwizzleReverse(
Imm);
9065 }
else if (trySkipId(IdSymbolic[ID_FFT])) {
9066 Ok = parseSwizzleFFT(
Imm);
9067 }
else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9068 Ok = parseSwizzleRotate(
Imm);
9070 Error(ModeLoc,
"expected a swizzle mode");
9073 return Ok && skipToken(
AsmToken::RParen,
"expected a closing parentheses");
9079ParseStatus AMDGPUAsmParser::parseSwizzle(
OperandVector &Operands) {
9083 if (trySkipId(
"offset")) {
9087 if (trySkipId(
"swizzle")) {
9088 Ok = parseSwizzleMacro(
Imm);
9090 Ok = parseSwizzleOffset(
Imm);
9094 Operands.
push_back(AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTySwizzle));
9102AMDGPUOperand::isSwizzle()
const {
9103 return isImmTy(ImmTySwizzle);
9110int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9112 using namespace llvm::AMDGPU::VGPRIndexMode;
9124 for (
unsigned ModeId = ID_MIN; ModeId <=
ID_MAX; ++ModeId) {
9125 if (trySkipId(IdSymbolic[ModeId])) {
9133 "expected a VGPR index mode or a closing parenthesis" :
9134 "expected a VGPR index mode");
9139 Error(S,
"duplicate VGPR index mode");
9147 "expected a comma or a closing parenthesis"))
9154ParseStatus AMDGPUAsmParser::parseGPRIdxMode(
OperandVector &Operands) {
9156 using namespace llvm::AMDGPU::VGPRIndexMode;
9162 Imm = parseGPRIdxMacro();
9166 if (getParser().parseAbsoluteExpression(
Imm))
9169 return Error(S,
"invalid immediate: only 4-bit values are legal");
9173 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9177bool AMDGPUOperand::isGPRIdxMode()
const {
9178 return isImmTy(ImmTyGprIdxMode);
9185ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(
OperandVector &Operands) {
9190 if (isRegister() || isModifier())
9196 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.
size() - 1]);
9197 assert(Opr.isImm() || Opr.isExpr());
9198 SMLoc Loc = Opr.getStartLoc();
9202 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9203 Error(Loc,
"expected an absolute expression or a label");
9204 }
else if (Opr.isImm() && !Opr.isS16Imm()) {
9205 Error(Loc,
"expected a 16-bit signed jump offset");
9215ParseStatus AMDGPUAsmParser::parseBoolReg(
OperandVector &Operands) {
9216 return parseReg(Operands);
9223void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9226 OptionalImmIndexMap OptionalIdx;
9227 unsigned FirstOperandIdx = 1;
9228 bool IsAtomicReturn =
false;
9235 for (
unsigned i = FirstOperandIdx, e = Operands.
size(); i != e; ++i) {
9236 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9240 Op.addRegOperands(Inst, 1);
9244 if (IsAtomicReturn && i == FirstOperandIdx)
9245 Op.addRegOperands(Inst, 1);
9250 if (
Op.isImm() &&
Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9251 Op.addImmOperands(Inst, 1);
9263 OptionalIdx[
Op.getImmTy()] = i;
9277bool AMDGPUOperand::isSMRDOffset8()
const {
9281bool AMDGPUOperand::isSMEMOffset()
const {
9283 return isImmLiteral();
9286bool AMDGPUOperand::isSMRDLiteralOffset()
const {
9321bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9322 if (BoundCtrl == 0 || BoundCtrl == 1) {
9330void AMDGPUAsmParser::onBeginOfFile() {
9331 if (!getParser().getStreamer().getTargetStreamer() ||
9335 if (!getTargetStreamer().getTargetID())
9336 getTargetStreamer().initializeTargetID(getSTI(),
9337 getSTI().getFeatureString());
9340 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9348bool AMDGPUAsmParser::parsePrimaryExpr(
const MCExpr *&Res, SMLoc &EndLoc) {
9352 StringRef TokenId = getTokenStr();
9353 AGVK VK = StringSwitch<AGVK>(TokenId)
9354 .Case(
"max", AGVK::AGVK_Max)
9355 .Case(
"or", AGVK::AGVK_Or)
9356 .Case(
"extrasgprs", AGVK::AGVK_ExtraSGPRs)
9357 .Case(
"totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9358 .Case(
"alignto", AGVK::AGVK_AlignTo)
9359 .Case(
"occupancy", AGVK::AGVK_Occupancy)
9360 .Default(AGVK::AGVK_None);
9364 uint64_t CommaCount = 0;
9369 if (Exprs.
empty()) {
9371 "empty " + Twine(TokenId) +
" expression");
9374 if (CommaCount + 1 != Exprs.
size()) {
9376 "mismatch of commas in " + Twine(TokenId) +
" expression");
9383 if (getParser().parseExpression(Expr, EndLoc))
9387 if (LastTokenWasComma)
9391 "unexpected token in " + Twine(TokenId) +
" expression");
9397 return getParser().parsePrimaryExpr(Res, EndLoc,
nullptr);
9400ParseStatus AMDGPUAsmParser::parseOModSI(
OperandVector &Operands) {
9401 StringRef
Name = getTokenStr();
9402 if (Name ==
"mul") {
9403 return parseIntWithPrefix(
"mul", Operands,
9407 if (Name ==
"div") {
9408 return parseIntWithPrefix(
"div", Operands,
9419 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9424 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9425 AMDGPU::OpName::src2};
9433 int DstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
9438 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
9440 if (
DstOp.isReg() &&
9445 if ((OpSel & (1 << SrcNum)) != 0)
9451void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9453 cvtVOP3P(Inst, Operands);
9457void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
const OperandVector &Operands,
9458 OptionalImmIndexMap &OptionalIdx) {
9459 cvtVOP3P(Inst, Operands, OptionalIdx);
9468 &&
Desc.NumOperands > (OpNum + 1)
9470 &&
Desc.operands()[OpNum + 1].RegClass != -1
9472 &&
Desc.getOperandConstraint(OpNum + 1,
9476void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst,
unsigned OpSel) {
9478 constexpr AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9479 AMDGPU::OpName::src2};
9480 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9481 AMDGPU::OpName::src1_modifiers,
9482 AMDGPU::OpName::src2_modifiers};
9483 for (
int J = 0; J < 3; ++J) {
9484 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9490 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9493 if ((OpSel & (1 << J)) != 0)
9496 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9503void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst,
const OperandVector &Operands)
9505 OptionalImmIndexMap OptionalIdx;
9510 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9511 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9514 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9515 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9517 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9518 }
else if (
Op.isInterpSlot() ||
Op.isInterpAttr() ||
9519 Op.isInterpAttrChan()) {
9521 }
else if (
Op.isImmModifier()) {
9522 OptionalIdx[
Op.getImmTy()] =
I;
9530 AMDGPUOperand::ImmTyHigh);
9534 AMDGPUOperand::ImmTyClamp);
9538 AMDGPUOperand::ImmTyOModSI);
9543 AMDGPUOperand::ImmTyOpSel);
9544 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9547 cvtOpSelHelper(Inst, OpSel);
9551void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst,
const OperandVector &Operands)
9553 OptionalImmIndexMap OptionalIdx;
9558 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9559 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9562 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9563 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9565 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9566 }
else if (
Op.isImmModifier()) {
9567 OptionalIdx[
Op.getImmTy()] =
I;
9575 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9585 cvtOpSelHelper(Inst, OpSel);
9588void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9590 OptionalImmIndexMap OptionalIdx;
9593 int CbszOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::cbsz);
9597 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J)
9598 static_cast<AMDGPUOperand &
>(*Operands[
I++]).addRegOperands(Inst, 1);
9600 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9601 AMDGPUOperand &
Op =
static_cast<AMDGPUOperand &
>(*Operands[
I]);
9606 if (NumOperands == CbszOpIdx) {
9611 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9612 }
else if (
Op.isImmModifier()) {
9613 OptionalIdx[
Op.getImmTy()] =
I;
9615 Op.addRegOrImmOperands(Inst, 1);
9620 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9621 if (CbszIdx != OptionalIdx.end()) {
9622 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).
getImm();
9626 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::blgp);
9627 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9628 if (BlgpIdx != OptionalIdx.end()) {
9629 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).
getImm();
9640 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9641 if (OpselIdx != OptionalIdx.end()) {
9642 OpSel =
static_cast<const AMDGPUOperand &
>(*Operands[OpselIdx->second])
9646 unsigned OpSelHi = 0;
9647 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9648 if (OpselHiIdx != OptionalIdx.end()) {
9649 OpSelHi =
static_cast<const AMDGPUOperand &
>(*Operands[OpselHiIdx->second])
9652 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9653 AMDGPU::OpName::src1_modifiers};
9655 for (
unsigned J = 0; J < 2; ++J) {
9656 unsigned ModVal = 0;
9657 if (OpSel & (1 << J))
9659 if (OpSelHi & (1 << J))
9662 const int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9667void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands,
9668 OptionalImmIndexMap &OptionalIdx) {
9673 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
9674 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
9677 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
9678 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
9680 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9681 }
else if (
Op.isImmModifier()) {
9682 OptionalIdx[
Op.getImmTy()] =
I;
9684 Op.addRegOrImmOperands(Inst, 1);
9690 AMDGPUOperand::ImmTyScaleSel);
9694 AMDGPUOperand::ImmTyClamp);
9700 AMDGPUOperand::ImmTyByteSel);
9705 AMDGPUOperand::ImmTyOModSI);
9712 auto *it = Inst.
begin();
9713 std::advance(it, AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers));
9721void AMDGPUAsmParser::cvtVOP3(MCInst &Inst,
const OperandVector &Operands) {
9722 OptionalImmIndexMap OptionalIdx;
9723 cvtVOP3(Inst, Operands, OptionalIdx);
9726void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands,
9727 OptionalImmIndexMap &OptIdx) {
9733 if (
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9734 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9735 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9736 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9737 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9738 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9739 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9740 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9741 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9742 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9751 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
9752 if (VdstInIdx != -1 && VdstInIdx ==
static_cast<int>(Inst.
getNumOperands()))
9755 int BitOp3Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::bitop3);
9756 if (BitOp3Idx != -1) {
9763 int OpSelIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel);
9764 if (OpSelIdx != -1) {
9768 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::op_sel_hi);
9769 if (OpSelHiIdx != -1) {
9770 int DefaultVal =
IsPacked ? -1 : 0;
9776 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_fmt);
9777 if (MatrixAFMTIdx != -1) {
9779 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9783 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_fmt);
9784 if (MatrixBFMTIdx != -1) {
9786 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9789 int MatrixAScaleIdx =
9790 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale);
9791 if (MatrixAScaleIdx != -1) {
9793 AMDGPUOperand::ImmTyMatrixAScale, 0);
9796 int MatrixBScaleIdx =
9797 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale);
9798 if (MatrixBScaleIdx != -1) {
9800 AMDGPUOperand::ImmTyMatrixBScale, 0);
9803 int MatrixAScaleFmtIdx =
9804 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9805 if (MatrixAScaleFmtIdx != -1) {
9807 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9810 int MatrixBScaleFmtIdx =
9811 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9812 if (MatrixBScaleFmtIdx != -1) {
9814 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9819 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9823 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9825 int NegLoIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_lo);
9829 int NegHiIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::neg_hi);
9833 const AMDGPU::OpName
Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9834 AMDGPU::OpName::src2};
9835 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9836 AMDGPU::OpName::src1_modifiers,
9837 AMDGPU::OpName::src2_modifiers};
9840 unsigned OpSelHi = 0;
9847 if (OpSelHiIdx != -1)
9856 for (
int J = 0; J < 3; ++J) {
9857 int OpIdx = AMDGPU::getNamedOperandIdx(
Opc,
Ops[J]);
9861 int ModIdx = AMDGPU::getNamedOperandIdx(
Opc, ModOps[J]);
9866 uint32_t ModVal = 0;
9869 if (SrcOp.
isReg() && getMRI()
9876 if ((OpSel & (1 << J)) != 0)
9880 if ((OpSelHi & (1 << J)) != 0)
9883 if ((NegLo & (1 << J)) != 0)
9886 if ((NegHi & (1 << J)) != 0)
9893void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
const OperandVector &Operands) {
9894 OptionalImmIndexMap OptIdx;
9895 cvtVOP3(Inst, Operands, OptIdx);
9896 cvtVOP3P(Inst, Operands, OptIdx);
9900 unsigned i,
unsigned Opc,
9902 if (AMDGPU::getNamedOperandIdx(
Opc,
OpName) != -1)
9903 ((AMDGPUOperand &)*
Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9905 ((AMDGPUOperand &)*
Operands[i]).addRegOperands(Inst, 1);
9908void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst,
const OperandVector &Operands) {
9911 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9914 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9915 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1);
9917 OptionalImmIndexMap OptIdx;
9918 for (
unsigned i = 5; i < Operands.
size(); ++i) {
9919 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[i]);
9920 OptIdx[
Op.getImmTy()] = i;
9925 AMDGPUOperand::ImmTyIndexKey8bit);
9929 AMDGPUOperand::ImmTyIndexKey16bit);
9933 AMDGPUOperand::ImmTyIndexKey32bit);
9938 cvtVOP3P(Inst, Operands, OptIdx);
9945ParseStatus AMDGPUAsmParser::parseVOPD(
OperandVector &Operands) {
9953 Operands.
push_back(AMDGPUOperand::CreateToken(
this,
"::", S));
9954 SMLoc OpYLoc = getLoc();
9957 Operands.
push_back(AMDGPUOperand::CreateToken(
this, OpYName, OpYLoc));
9960 return Error(OpYLoc,
"expected a VOPDY instruction after ::");
9966void AMDGPUAsmParser::cvtVOPD(MCInst &Inst,
const OperandVector &Operands) {
9969 auto addOp = [&](uint16_t ParsedOprIdx) {
9970 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9972 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9976 Op.addRegOperands(Inst, 1);
9980 Op.addImmOperands(Inst, 1);
9992 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9996 const auto &CInfo = InstInfo[CompIdx];
9997 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9998 for (
unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9999 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10000 if (CInfo.hasSrc2Acc())
10001 addOp(CInfo.getIndexOfDstInParsedOperands());
10005 AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::bitop3);
10006 if (BitOp3Idx != -1) {
10007 OptionalImmIndexMap OptIdx;
10008 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands.
back());
10010 OptIdx[
Op.getImmTy()] = Operands.
size() - 1;
10020bool AMDGPUOperand::isDPP8()
const {
10021 return isImmTy(ImmTyDPP8);
10024bool AMDGPUOperand::isDPPCtrl()
const {
10025 using namespace AMDGPU::DPP;
10027 bool result = isImm() && getImmTy() == ImmTyDppCtrl &&
isUInt<9>(
getImm());
10030 return (
Imm >= DppCtrl::QUAD_PERM_FIRST &&
Imm <= DppCtrl::QUAD_PERM_LAST) ||
10031 (
Imm >= DppCtrl::ROW_SHL_FIRST &&
Imm <= DppCtrl::ROW_SHL_LAST) ||
10032 (
Imm >= DppCtrl::ROW_SHR_FIRST &&
Imm <= DppCtrl::ROW_SHR_LAST) ||
10033 (
Imm >= DppCtrl::ROW_ROR_FIRST &&
Imm <= DppCtrl::ROW_ROR_LAST) ||
10034 (
Imm == DppCtrl::WAVE_SHL1) ||
10035 (
Imm == DppCtrl::WAVE_ROL1) ||
10036 (
Imm == DppCtrl::WAVE_SHR1) ||
10037 (
Imm == DppCtrl::WAVE_ROR1) ||
10038 (
Imm == DppCtrl::ROW_MIRROR) ||
10039 (
Imm == DppCtrl::ROW_HALF_MIRROR) ||
10040 (
Imm == DppCtrl::BCAST15) ||
10041 (
Imm == DppCtrl::BCAST31) ||
10042 (
Imm >= DppCtrl::ROW_SHARE_FIRST &&
Imm <= DppCtrl::ROW_SHARE_LAST) ||
10043 (
Imm >= DppCtrl::ROW_XMASK_FIRST &&
Imm <= DppCtrl::ROW_XMASK_LAST);
10052bool AMDGPUOperand::isBLGP()
const {
10056bool AMDGPUOperand::isS16Imm()
const {
10060bool AMDGPUOperand::isU16Imm()
const {
10068bool AMDGPUAsmParser::parseDimId(
unsigned &Encoding) {
10073 SMLoc Loc =
getToken().getEndLoc();
10074 Token = std::string(getTokenStr());
10076 if (getLoc() != Loc)
10081 if (!parseId(Suffix))
10085 StringRef DimId = Token;
10096ParseStatus AMDGPUAsmParser::parseDim(
OperandVector &Operands) {
10100 SMLoc S = getLoc();
10106 SMLoc Loc = getLoc();
10107 if (!parseDimId(Encoding))
10108 return Error(Loc,
"invalid dim value");
10110 Operands.
push_back(AMDGPUOperand::CreateImm(
this, Encoding, S,
10111 AMDGPUOperand::ImmTyDim));
10119ParseStatus AMDGPUAsmParser::parseDPP8(
OperandVector &Operands) {
10120 SMLoc S = getLoc();
10129 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10132 for (
size_t i = 0; i < 8; ++i) {
10136 SMLoc Loc = getLoc();
10137 if (getParser().parseAbsoluteExpression(Sels[i]))
10139 if (0 > Sels[i] || 7 < Sels[i])
10140 return Error(Loc,
"expected a 3-bit value");
10143 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10147 for (
size_t i = 0; i < 8; ++i)
10148 DPP8 |= (Sels[i] << (i * 3));
10150 Operands.
push_back(AMDGPUOperand::CreateImm(
this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10155AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10157 if (Ctrl ==
"row_newbcast")
10160 if (Ctrl ==
"row_share" ||
10161 Ctrl ==
"row_xmask")
10164 if (Ctrl ==
"wave_shl" ||
10165 Ctrl ==
"wave_shr" ||
10166 Ctrl ==
"wave_rol" ||
10167 Ctrl ==
"wave_ror" ||
10168 Ctrl ==
"row_bcast")
10171 return Ctrl ==
"row_mirror" ||
10172 Ctrl ==
"row_half_mirror" ||
10173 Ctrl ==
"quad_perm" ||
10174 Ctrl ==
"row_shl" ||
10175 Ctrl ==
"row_shr" ||
10180AMDGPUAsmParser::parseDPPCtrlPerm() {
10183 if (!skipToken(
AsmToken::LBrac,
"expected an opening square bracket"))
10187 for (
int i = 0; i < 4; ++i) {
10192 SMLoc Loc = getLoc();
10193 if (getParser().parseAbsoluteExpression(Temp))
10195 if (Temp < 0 || Temp > 3) {
10196 Error(Loc,
"expected a 2-bit value");
10200 Val += (Temp << i * 2);
10203 if (!skipToken(
AsmToken::RBrac,
"expected a closing square bracket"))
10210AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10211 using namespace AMDGPU::DPP;
10216 SMLoc Loc = getLoc();
10218 if (getParser().parseAbsoluteExpression(Val))
10221 struct DppCtrlCheck {
10227 DppCtrlCheck
Check = StringSwitch<DppCtrlCheck>(Ctrl)
10228 .Case(
"wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10229 .Case(
"wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10230 .Case(
"wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10231 .Case(
"wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10232 .Case(
"row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10233 .Case(
"row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10234 .Case(
"row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10235 .Case(
"row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10236 .Case(
"row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10237 .Case(
"row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10241 if (
Check.Ctrl == -1) {
10242 Valid = (
Ctrl ==
"row_bcast" && (Val == 15 || Val == 31));
10250 Error(Loc, Twine(
"invalid ", Ctrl) + Twine(
" value"));
10257ParseStatus AMDGPUAsmParser::parseDPPCtrl(
OperandVector &Operands) {
10258 using namespace AMDGPU::DPP;
10261 !isSupportedDPPCtrl(getTokenStr(), Operands))
10264 SMLoc S = getLoc();
10270 if (Ctrl ==
"row_mirror") {
10271 Val = DppCtrl::ROW_MIRROR;
10272 }
else if (Ctrl ==
"row_half_mirror") {
10273 Val = DppCtrl::ROW_HALF_MIRROR;
10276 if (Ctrl ==
"quad_perm") {
10277 Val = parseDPPCtrlPerm();
10279 Val = parseDPPCtrlSel(Ctrl);
10288 AMDGPUOperand::CreateImm(
this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10292void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst,
const OperandVector &Operands,
10294 OptionalImmIndexMap OptionalIdx;
10301 int OldIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::old);
10303 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2_modifiers);
10304 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10308 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10309 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10313 int VdstInIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
10314 bool IsVOP3CvtSrDpp =
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10315 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10316 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10317 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10318 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10319 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10320 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10321 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10323 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10327 if (OldIdx == NumOperands) {
10329 constexpr int DST_IDX = 0;
10331 }
else if (Src2ModIdx == NumOperands) {
10341 if (IsVOP3CvtSrDpp) {
10350 if (TiedTo != -1) {
10355 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10357 if (IsDPP8 &&
Op.isDppFI()) {
10360 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10361 }
else if (
Op.isReg()) {
10362 Op.addRegOperands(Inst, 1);
10363 }
else if (
Op.isImm() &&
10365 Op.addImmOperands(Inst, 1);
10366 }
else if (
Op.isImm()) {
10367 OptionalIdx[
Op.getImmTy()] =
I;
10375 AMDGPUOperand::ImmTyClamp);
10381 AMDGPUOperand::ImmTyByteSel);
10388 cvtVOP3P(Inst, Operands, OptionalIdx);
10390 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10397 using namespace llvm::AMDGPU::DPP;
10407 AMDGPUOperand::ImmTyDppFI);
10411void AMDGPUAsmParser::cvtDPP(MCInst &Inst,
const OperandVector &Operands,
bool IsDPP8) {
10412 OptionalImmIndexMap OptionalIdx;
10416 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10417 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10421 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10424 if (TiedTo != -1) {
10429 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10431 if (
Op.isReg() && validateVccOperand(
Op.getReg())) {
10439 Op.addImmOperands(Inst, 1);
10441 Op.addRegWithFPInputModsOperands(Inst, 2);
10442 }
else if (
Op.isDppFI()) {
10444 }
else if (
Op.isReg()) {
10445 Op.addRegOperands(Inst, 1);
10451 Op.addRegWithFPInputModsOperands(Inst, 2);
10452 }
else if (
Op.isReg()) {
10453 Op.addRegOperands(Inst, 1);
10454 }
else if (
Op.isDPPCtrl()) {
10455 Op.addImmOperands(Inst, 1);
10456 }
else if (
Op.isImm()) {
10458 OptionalIdx[
Op.getImmTy()] =
I;
10466 using namespace llvm::AMDGPU::DPP;
10474 AMDGPUOperand::ImmTyDppFI);
10483ParseStatus AMDGPUAsmParser::parseSDWASel(
OperandVector &Operands,
10485 AMDGPUOperand::ImmTy
Type) {
10486 return parseStringOrIntWithPrefix(
10488 {
"BYTE_0",
"BYTE_1",
"BYTE_2",
"BYTE_3",
"WORD_0",
"WORD_1",
"DWORD"},
10492ParseStatus AMDGPUAsmParser::parseSDWADstUnused(
OperandVector &Operands) {
10493 return parseStringOrIntWithPrefix(
10494 Operands,
"dst_unused", {
"UNUSED_PAD",
"UNUSED_SEXT",
"UNUSED_PRESERVE"},
10495 AMDGPUOperand::ImmTySDWADstUnused);
10498void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst,
const OperandVector &Operands) {
10502void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst,
const OperandVector &Operands) {
10506void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst,
const OperandVector &Operands) {
10510void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst,
const OperandVector &Operands) {
10514void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst,
const OperandVector &Operands) {
10518void AMDGPUAsmParser::cvtSDWA(MCInst &Inst,
const OperandVector &Operands,
10519 uint64_t BasicInstType,
10522 using namespace llvm::AMDGPU::SDWA;
10524 OptionalImmIndexMap OptionalIdx;
10525 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10526 bool SkippedVcc =
false;
10530 for (
unsigned J = 0; J <
Desc.getNumDefs(); ++J) {
10531 ((AMDGPUOperand &)*Operands[
I++]).addRegOperands(Inst, 1);
10534 for (
unsigned E = Operands.
size();
I !=
E; ++
I) {
10535 AMDGPUOperand &
Op = ((AMDGPUOperand &)*Operands[
I]);
10536 if (SkipVcc && !SkippedVcc &&
Op.isReg() &&
10537 (
Op.getReg() == AMDGPU::VCC ||
Op.getReg() == AMDGPU::VCC_LO)) {
10555 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10556 }
else if (
Op.isImm()) {
10558 OptionalIdx[
Op.getImmTy()] =
I;
10562 SkippedVcc =
false;
10566 if (
Opc != AMDGPU::V_NOP_sdwa_gfx10 &&
Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10567 Opc != AMDGPU::V_NOP_sdwa_vi) {
10569 switch (BasicInstType) {
10573 AMDGPUOperand::ImmTyClamp, 0);
10577 AMDGPUOperand::ImmTyOModSI, 0);
10581 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10585 AMDGPUOperand::ImmTySDWADstUnused,
10586 DstUnused::UNUSED_PRESERVE);
10588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10593 AMDGPUOperand::ImmTyClamp, 0);
10598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10599 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10607 AMDGPUOperand::ImmTyClamp, 0);
10608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10613 llvm_unreachable(
"Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10619 if (Inst.
getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10620 Inst.
getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10621 auto *it = Inst.
begin();
10623 it, AMDGPU::getNamedOperandIdx(Inst.
getOpcode(), AMDGPU::OpName::src2));
10635#define GET_MATCHER_IMPLEMENTATION
10636#define GET_MNEMONIC_SPELL_CHECKER
10637#define GET_MNEMONIC_CHECKER
10638#include "AMDGPUGenAsmMatcher.inc"
10644 return parseTokenOp(
"addr64",
Operands);
10646 return parseNamedBit(
"done",
Operands, AMDGPUOperand::ImmTyDone,
true);
10648 return parseTokenOp(
"idxen",
Operands);
10650 return parseNamedBit(
"lds",
Operands, AMDGPUOperand::ImmTyLDS,
10653 return parseTokenOp(
"offen",
Operands);
10655 return parseTokenOp(
"off",
Operands);
10656 case MCK_row_95_en:
10657 return parseNamedBit(
"row_en",
Operands, AMDGPUOperand::ImmTyRowEn,
true);
10659 return parseNamedBit(
"gds",
Operands, AMDGPUOperand::ImmTyGDS);
10661 return parseNamedBit(
"tfe",
Operands, AMDGPUOperand::ImmTyTFE);
10663 return tryCustomParseOperand(
Operands, MCK);
10668unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &
Op,
10674 AMDGPUOperand &Operand = (AMDGPUOperand&)
Op;
10677 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10679 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10681 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10683 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10685 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10687 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10689 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10690 case MCK_row_95_en:
10691 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10699 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10701 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10702 case MCK_SOPPBrTarget:
10703 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10704 case MCK_VReg32OrOff:
10705 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10706 case MCK_InterpSlot:
10707 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10708 case MCK_InterpAttr:
10709 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10710 case MCK_InterpAttrChan:
10711 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10713 case MCK_SReg_64_XEXEC:
10723 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10725 return Match_InvalidOperand;
10733ParseStatus AMDGPUAsmParser::parseEndpgm(
OperandVector &Operands) {
10734 SMLoc S = getLoc();
10743 return Error(S,
"expected a 16-bit value");
10746 AMDGPUOperand::CreateImm(
this,
Imm, S, AMDGPUOperand::ImmTyEndpgm));
10750bool AMDGPUOperand::isEndpgm()
const {
return isImmTy(ImmTyEndpgm); }
10756bool AMDGPUOperand::isSplitBarrier()
const {
return isInlinableImm(MVT::i32); }
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Loop::LoopBounds::Direction Direction
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
static const fltSemantics & BFloat()
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
opStatus
IEEE-754R 7: Default exception handling.
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
bool is(TokenKind K) const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Context object for machine code objects.
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
unsigned getNumOperands() const
unsigned getOpcode() const
iterator insert(iterator I, const MCOperand &Op)
void addOperand(const MCOperand Op)
const MCOperand & getOperand(unsigned i) const
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Instances of this class represent operands of the MCInst class.
static MCOperand createExpr(const MCExpr *Val)
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
void setReg(MCRegister Reg)
Set the register number.
MCRegister getReg() const
Returns the register number.
const MCExpr * getExpr() const
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
constexpr bool isValid() const
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
bool isVariable() const
isVariable - Check if this is a variable symbol.
LLVM_ABI void setVariableValue(const MCExpr *Value)
void setRedefinable(bool Value)
Mark this symbol as redefinable.
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
MCTargetAsmParser - Generic interface to target specific assembly parsers.
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
constexpr bool isValid() const
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Represent a constant reference to a string, i.e.
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
constexpr size_t size() const
Get the string size.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
bool contains(StringRef key) const
Check if the set contains the given key.
std::pair< typename Base::iterator, bool > insert(StringRef key)
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
std::pair< iterator, bool > insert(const ValueT &V)
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ UNDEF
UNDEF - An undefined node.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
FunctionAddr VTableAddr Value
StringMapEntry< Value * > ValueName
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
void PrintError(const Twine &Msg)
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
@ Default
The result value is uniform if and only if all operands are uniform.
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
uint32_t PrivateSegmentSize
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
const MCExpr * compute_pgm_rsrc2
const MCExpr * kernarg_size
const MCExpr * kernarg_preload
const MCExpr * compute_pgm_rsrc3
const MCExpr * private_segment_fixed_size
const MCExpr * compute_pgm_rsrc1
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
const MCExpr * group_segment_fixed_size
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
const MCExpr * kernel_code_properties
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...
uint32_t group_segment_fixed_size
uint32_t private_segment_fixed_size