511 : ST(&_ST), MRI(&_MRI) {
513 addRulesForGOpcs({G_ADD, G_SUB},
Standard)
523 addRulesForGOpcs({G_UADDO, G_USUBO},
Standard)
527 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE},
Standard)
531 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT},
Standard)
539 bool HasVecMulU64 = ST->hasVectorMulU64();
551 bool hasMulHi = ST->hasScalarMulHiInsts();
552 addRulesForGOpcs({G_UMULH, G_SMULH},
Standard)
557 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32},
Standard)
561 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
562 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32},
Standard)
566 addRulesForGOpcs({G_XOR, G_OR, G_AND},
StandardB)
586 addRulesForGOpcs({G_LSHR},
Standard)
596 addRulesForGOpcs({G_ASHR},
Standard)
606 addRulesForGOpcs({G_FSHR},
Standard)
610 addRulesForGOpcs({G_BSWAP},
Standard)
618 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
619 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
627 addRulesForGOpcs({G_UBFX, G_SBFX},
Standard)
633 addRulesForGOpcs({G_SMIN, G_SMAX},
Standard)
641 addRulesForGOpcs({G_UMIN, G_UMAX},
Standard)
652 addRulesForGOpcs({G_CONSTANT})
655 addRulesForGOpcs({G_FREEZE})
662 addRulesForGOpcs({G_UNMERGE_VALUES})
667 addRulesForGOpcs({G_PHI})
677 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
678 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
679 G_AMDGPU_INTRIN_IMAGE_STORE,
680 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
695 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
697 addRulesForGOpcs({G_ICMP})
716 addRulesForGOpcs({G_BRCOND})
720 addRulesForGOpcs({G_BR}).
Any({{
_}, {{}, {
None}}});
730 addRulesForGOpcs({G_ANYEXT})
742 bool Has16bitCmp = ST->has16BitInsts();
746 addRulesForGOpcs({G_TRUNC})
763 addRulesForGOpcs({G_ZEXT})
778 addRulesForGOpcs({G_SEXT})
793 addRulesForGOpcs({G_SEXT_INREG})
799 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT},
Standard)
805 addRulesForGOpcs({G_ASSERT_ALIGN},
Standard)
817 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
818 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
819 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
820 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
821 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
829 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
830 bool HasAtomicBufferGlobalPkAddF16Insts =
831 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
832 ST->hasAtomicBufferGlobalPkAddF16Insts();
833 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
834 addRulesForGOpcs({G_ATOMICRMW_FADD})
842 HasAtomicFlatPkAdd16Insts)
844 HasAtomicBufferGlobalPkAddF16Insts)
846 HasAtomicDsPkAdd16Insts);
848 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
854 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
860 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP},
Standard)
866 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
867 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
868 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
869 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
870 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
871 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
872 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
877 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
878 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
879 bool usesTrue16 = ST->useRealTrue16Insts();
882 return (*
MI.memoperands_begin())->getAlign() >=
Align(16);
886 return (*
MI.memoperands_begin())->getAlign() >=
Align(4);
890 return (*
MI.memoperands_begin())->isAtomic();
906 return (*
MI.memoperands_begin())->isVolatile();
910 return (*
MI.memoperands_begin())->isInvariant();
925 return MemSize == 16 || MemSize == 8;
933 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
934 (isConst || isInvMMO || isNoClobberMMO);
938 addRulesForGOpcs({G_LOAD})
1045 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})
1065 addRulesForGOpcs({G_STORE})
1101 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1102 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1113 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1114 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1120 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1125 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1138 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1147 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1148 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1149 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1150 G_AMDGPU_TBUFFER_STORE_FORMAT,
1151 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1163 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1172 addRulesForGOpcs({G_PTR_ADD})
1178 addRulesForGOpcs({G_INTTOPTR})
1186 addRulesForGOpcs({G_PTRTOINT})
1196 addRulesForGOpcs({G_PTRMASK})
1204 addRulesForGOpcs({G_BITREVERSE},
Standard)
1210 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1217 addRulesForGOpcs({G_FENCE}).
Any({{{}}, {{}, {}}});
1219 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER},
Standard)
1224 addRulesForGOpcs({G_GLOBAL_VALUE})
1231 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).
Any({{
UniP5}, {{
SgprP5}, {}}});
1233 addRulesForGOpcs({G_SI_CALL})
1239 bool hasSALUFloat = ST->hasSALUFloatInsts();
1241 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL},
Standard)
1255 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB},
Standard)
1263 addRulesForGOpcs({G_FMAD},
Standard)
1269 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP},
Standard)
1277 addRulesForGOpcs({G_FMA, G_STRICT_FMA},
Standard)
1295 addRulesForGOpcs({G_AMDGPU_FMED3},
Standard)
1304 addRulesForGOpcs({G_AMDGPU_SMED3},
Standard)
1312 addRulesForGOpcs({G_FNEG, G_FABS},
Standard)
1327 addRulesForGOpcs({G_FCANONICALIZE},
Standard)
1339 bool hasPST = ST->hasPseudoScalarTrans();
1340 addRulesForGOpcs({G_FSQRT},
Standard)
1345 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1357 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1369 addRulesForGOpcs({G_FPEXT})
1376 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32},
Standard)
1380 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY},
Standard)
1384 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1386 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM},
Standard)
1398 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1411 addRulesForGOpcs({G_FPTRUNC})
1420 addRulesForGOpcs({G_IS_FPCLASS})
1428 addRulesForGOpcs({G_FCMP},
Standard)
1442 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1458 addRulesForIOpcs({amdgcn_s_setreg})
1461 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1464 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1468 addRulesForIOpcs({amdgcn_s_memrealtime},
Standard)
1471 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1472 amdgcn_reloc_constant},
1477 addRulesForIOpcs({amdgcn_endpgm,
1479 amdgcn_s_barrier_signal,
1480 amdgcn_s_barrier_wait,
1485 amdgcn_s_ttracedata_imm,
1486 amdgcn_s_wait_asynccnt,
1487 amdgcn_s_wait_bvhcnt,
1488 amdgcn_s_wait_dscnt,
1489 amdgcn_s_wait_event,
1490 amdgcn_s_wait_event_export_ready,
1491 amdgcn_s_wait_expcnt,
1492 amdgcn_s_wait_kmcnt,
1493 amdgcn_s_wait_loadcnt,
1494 amdgcn_s_wait_samplecnt,
1495 amdgcn_s_wait_storecnt,
1496 amdgcn_s_wait_tensorcnt,
1498 amdgcn_wave_barrier})
1499 .
Any({{}, {{}, {}}});
1503 addRulesForIOpcs({amdgcn_s_sleep_var})
1506 addRulesForIOpcs({amdgcn_s_prefetch_data})
1509 addRulesForIOpcs({amdgcn_class})
1518 addRulesForIOpcs({amdgcn_end_cf})
1522 addRulesForIOpcs({amdgcn_if_break},
Standard)
1526 addRulesForIOpcs({amdgcn_exp})
1530 addRulesForIOpcs({amdgcn_exp_compr})
1533 addRulesForIOpcs({amdgcn_exp_row})
1539 addRulesForIOpcs({amdgcn_lds_direct_load},
StandardB)
1542 addRulesForIOpcs({amdgcn_lds_param_load},
Standard)
1545 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi},
Standard)
1548 addRulesForIOpcs({amdgcn_readfirstlane})
1554 addRulesForIOpcs({amdgcn_readlane},
StandardB)
1557 addRulesForIOpcs({amdgcn_writelane},
StandardB)
1562 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16},
Standard)
1567 addRulesForIOpcs({amdgcn_perm},
Standard)
1571 addRulesForIOpcs({amdgcn_wave_reduce_umax, amdgcn_wave_reduce_umin},
Standard)
1577 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz},
Standard)
1583 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1584 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1585 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1590 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24},
Standard)
1596 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1597 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1603 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1609 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract},
Standard)
1617 addRulesForIOpcs({amdgcn_prng_b32})
1621 addRulesForIOpcs({amdgcn_sffbh},
Standard)
1625 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe},
Standard)
1631 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1632 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1637 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1641 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1645 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1649 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num},
Standard)
1652 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1656 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1659 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1663 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1666 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1669 addRulesForIOpcs({amdgcn_global_load_lds})
1672 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1688 addRulesForIOpcs({amdgcn_wqm_demote}).
Any({{}, {{}, {
IntrId,
Vcc}}});
1690 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1693 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8},
StandardB)
1697 addRulesForIOpcs({amdgcn_sin, amdgcn_cos},
Standard)
1703 addRulesForIOpcs({amdgcn_trig_preop},
Standard)
1707 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
1712 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume},
Standard)
1717 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn},
Standard)
1720 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn},
Standard)
1723 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn},
Standard)
1726 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap},
Standard)
1729 addRulesForIOpcs({amdgcn_ds_swizzle},
Standard)
1733 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var},
Standard)
1736 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap},
Standard)
1739 addRulesForIOpcs({amdgcn_permlane64},
StandardB)
1742 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1745 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
1748 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
1751 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
1752 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
1757 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
1762 addRulesForIOpcs({amdgcn_div_fmas},
Standard)
1768 addRulesForIOpcs({amdgcn_div_fixup},
Standard)
1776 addRulesForIOpcs({amdgcn_div_scale},
Standard)
1782 addRulesForIOpcs({amdgcn_udot2, amdgcn_sdot2},
Standard)
1786 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8},
Standard)