LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case DivAnyTy:
102 return MUI.isDivergentAtDef(Reg);
103 case UniS1:
104 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniformAtDef(Reg);
105 case UniS16:
106 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniformAtDef(Reg);
107 case UniS32:
108 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniformAtDef(Reg);
109 case UniS64:
110 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniformAtDef(Reg);
111 case UniS128:
112 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniformAtDef(Reg);
113 case UniP0:
114 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniformAtDef(Reg);
115 case UniP1:
116 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniformAtDef(Reg);
117 case UniP2:
118 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniformAtDef(Reg);
119 case UniP3:
120 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniformAtDef(Reg);
121 case UniP4:
122 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniformAtDef(Reg);
123 case UniP5:
124 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniformAtDef(Reg);
125 case UniP8:
126 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniformAtDef(Reg);
127 case UniPtr32:
128 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniformAtDef(Reg);
129 case UniPtr64:
130 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniformAtDef(Reg);
131 case UniPtr128:
132 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniformAtDef(Reg);
133 case UniV2S16:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) &&
135 MUI.isUniformAtDef(Reg);
136 case UniV2S32:
137 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) &&
138 MUI.isUniformAtDef(Reg);
139 case UniB32:
140 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniformAtDef(Reg);
141 case UniB64:
142 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniformAtDef(Reg);
143 case UniB96:
144 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniformAtDef(Reg);
145 case UniB128:
146 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniformAtDef(Reg);
147 case UniB160:
148 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniformAtDef(Reg);
149 case UniB256:
150 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniformAtDef(Reg);
151 case UniB512:
152 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniformAtDef(Reg);
153 case UniBRC: {
154 if (!MUI.isUniformAtDef(Reg))
155 return false;
156 // Check if there is SGPR register class of same size as the LLT.
157 const SIRegisterInfo *TRI =
158 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
159 // There is no 16 bit SGPR register class. Extra size check is required
160 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
161 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
162 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
163 }
164 case DivS1:
165 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergentAtDef(Reg);
166 case DivS16:
167 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergentAtDef(Reg);
168 case DivS32:
169 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergentAtDef(Reg);
170 case DivS64:
171 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergentAtDef(Reg);
172 case DivS128:
173 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergentAtDef(Reg);
174 case DivP0:
175 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergentAtDef(Reg);
176 case DivP1:
177 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergentAtDef(Reg);
178 case DivP2:
179 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergentAtDef(Reg);
180 case DivP3:
181 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergentAtDef(Reg);
182 case DivP4:
183 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergentAtDef(Reg);
184 case DivP5:
185 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergentAtDef(Reg);
186 case DivPtr32:
187 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergentAtDef(Reg);
188 case DivPtr64:
189 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergentAtDef(Reg);
190 case DivPtr128:
191 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergentAtDef(Reg);
192 case DivV2S16:
193 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) &&
195 case DivV2S32:
196 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) &&
198 case DivV3S32:
199 return MRI.getType(Reg) == LLT::fixed_vector(3, 32) &&
201 case DivV4S16:
202 return MRI.getType(Reg) == LLT::fixed_vector(4, 16) &&
204 case DivV6S32:
205 return MRI.getType(Reg) == LLT::fixed_vector(6, 32) &&
207 case DivB32:
208 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergentAtDef(Reg);
209 case DivB64:
210 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergentAtDef(Reg);
211 case DivB96:
212 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergentAtDef(Reg);
213 case DivB128:
214 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergentAtDef(Reg);
215 case DivB160:
216 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergentAtDef(Reg);
217 case DivB256:
218 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergentAtDef(Reg);
219 case DivB512:
220 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergentAtDef(Reg);
221 case DivBRC: {
222 if (!MUI.isDivergentAtDef(Reg))
223 return false;
224 // Check if there is VGPR register class of same size as the LLT.
225 const SIRegisterInfo *TRI =
226 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
227 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
228 }
229 case BRC: {
230 // Check if there is SGPR and VGPR register class of same size as the LLT.
231 const SIRegisterInfo *TRI =
232 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
233 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
234 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize) &&
235 TRI->getVGPRClassForBitWidth(LLTSize);
236 }
237 case _:
238 return true;
239 default:
240 llvm_unreachable("missing matchUniformityAndLLT");
241 }
242}
243
245 const MachineUniformityInfo &MUI,
246 const MachineRegisterInfo &MRI) const {
247 // Check LLT signature.
248 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
249 const MachineOperand &MO = MI.getOperand(i);
250 if (OpUniformityAndTypes[i] == _) {
251 assert((!MI.getOperand(i).isReg() ||
252 !MI.getOperand(i).getReg().isVirtual()) &&
253 "_ is for non-register and physical register operands only");
254 continue;
255 }
256
257 // Remaining IDs check registers.
258 if (!MO.isReg())
259 return false;
260
261 if (!matchUniformityAndLLT(MO.getReg(), OpUniformityAndTypes[i], MUI, MRI))
262 return false;
263 }
264
265 // More complex check.
266 if (TestFunc)
267 return TestFunc(MI);
268
269 return true;
270}
271
273
275 : FastTypes(FastTypes) {}
276
278 if (Ty == LLT::scalar(16))
279 return S16;
280 if (Ty == LLT::scalar(32))
281 return S32;
282 if (Ty == LLT::scalar(64))
283 return S64;
284 if (Ty == LLT::fixed_vector(2, 16))
285 return V2S16;
286 if (Ty == LLT::fixed_vector(2, 32))
287 return V2S32;
288 if (Ty == LLT::fixed_vector(3, 32))
289 return V3S32;
290 if (Ty == LLT::fixed_vector(4, 32))
291 return V4S32;
292 return _;
293}
294
296 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
297 isAnyPtr(Ty, 32))
298 return B32;
299 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
300 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
301 return B64;
302 if (Ty == LLT::fixed_vector(3, 32))
303 return B96;
304 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
305 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
306 return B128;
307 return _;
308}
309
310const RegBankLLTMapping *
312 const MachineRegisterInfo &MRI,
313 const MachineUniformityInfo &MUI) const {
314 // Search in "Fast Rules".
315 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
316 // slot that could "match fast Predicate". If not, InvalidMapping is
317 // returned which results in failure, does not search "Slow Rules".
318 if (FastTypes != NoFastRules) {
319 Register Reg = MI.getOperand(0).getReg();
320 int Slot;
321 if (FastTypes == StandardB)
322 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
323 else
324 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
325
326 if (Slot != -1)
327 return MUI.isUniformAtDef(Reg) ? &Uni[Slot] : &Div[Slot];
328 }
329
330 // Slow search for more complex rules.
331 for (const RegBankLegalizeRule &Rule : Rules) {
332 if (Rule.Predicate.match(MI, MUI, MRI))
333 return &Rule.OperandMapping;
334 }
335
336 return nullptr;
337}
338
340 Rules.push_back(Rule);
341}
342
344 RegBankLLTMapping RuleApplyIDs) {
345 int Slot = getFastPredicateSlot(Ty);
346 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
347 Div[Slot] = std::move(RuleApplyIDs);
348}
349
351 RegBankLLTMapping RuleApplyIDs) {
352 int Slot = getFastPredicateSlot(Ty);
353 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
354 Uni[Slot] = std::move(RuleApplyIDs);
355}
356
357int SetOfRulesForOpcode::getFastPredicateSlot(
359 switch (FastTypes) {
360 case Standard: {
361 switch (Ty) {
362 case S32:
363 return 0;
364 case S16:
365 return 1;
366 case S64:
367 return 2;
368 case V2S16:
369 return 3;
370 default:
371 return -1;
372 }
373 }
374 case StandardB: {
375 switch (Ty) {
376 case B32:
377 return 0;
378 case B64:
379 return 1;
380 case B96:
381 return 2;
382 case B128:
383 return 3;
384 default:
385 return -1;
386 }
387 }
388 case Vector: {
389 switch (Ty) {
390 case S32:
391 return 0;
392 case V2S32:
393 return 1;
394 case V3S32:
395 return 2;
396 case V4S32:
397 return 3;
398 default:
399 return -1;
400 }
401 }
402 default:
403 return -1;
404 }
405}
406
407RegBankLegalizeRules::RuleSetInitializer
408RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
409 FastRulesTypes FastTypes) {
410 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
411}
412
413RegBankLegalizeRules::RuleSetInitializer
414RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
415 FastRulesTypes FastTypes) {
416 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
417}
418
421 unsigned Opc = MI.getOpcode();
422 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
423 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
424 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
425 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
426 auto IRAIt = IRulesAlias.find(IntrID);
427 if (IRAIt == IRulesAlias.end())
428 return nullptr;
429 return &IRules.at(IRAIt->second);
430 }
431
432 auto GRAIt = GRulesAlias.find(Opc);
433 if (GRAIt == GRulesAlias.end())
434 return nullptr;
435 return &GRules.at(GRAIt->second);
436}
437
438// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
439class Predicate {
440private:
441 struct Elt {
442 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
443 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
444 // Sequences of && and || will be represented by jumps, for example:
445 // (A && B && ... X) or (A && B && ... X) || Y
446 // A == true jump to B
447 // A == false jump to end or Y, result is A(false) or Y
448 // (A || B || ... X) or (A || B || ... X) && Y
449 // A == true jump to end or Y, result is A(true) or Y
450 // A == false jump to B
451 // Notice that when negating expression, we simply flip Neg on each Pred
452 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
453 std::function<bool(const MachineInstr &)> Pred;
454 bool Neg; // Neg of Pred is calculated before jump
455 unsigned TJumpOffset;
456 unsigned FJumpOffset;
457 };
458
459 SmallVector<Elt, 8> Expression;
460
461 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
462
463public:
464 Predicate(std::function<bool(const MachineInstr &)> Pred) {
465 Expression.push_back({Pred, false, 1, 1});
466 };
467
468 bool operator()(const MachineInstr &MI) const {
469 unsigned Idx = 0;
470 unsigned ResultIdx = Expression.size();
471 bool Result;
472 do {
473 Result = Expression[Idx].Pred(MI);
474 Result = Expression[Idx].Neg ? !Result : Result;
475 if (Result) {
476 Idx += Expression[Idx].TJumpOffset;
477 } else {
478 Idx += Expression[Idx].FJumpOffset;
479 }
480 } while ((Idx != ResultIdx));
481
482 return Result;
483 };
484
485 Predicate operator!() const {
486 SmallVector<Elt, 8> NegExpression;
487 for (const Elt &ExprElt : Expression) {
488 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
489 ExprElt.TJumpOffset});
490 }
491 return Predicate(std::move(NegExpression));
492 };
493
494 Predicate operator&&(const Predicate &RHS) const {
495 SmallVector<Elt, 8> AndExpression = Expression;
496
497 unsigned RHSSize = RHS.Expression.size();
498 unsigned ResultIdx = Expression.size();
499 for (unsigned i = 0; i < ResultIdx; ++i) {
500 // LHS results in false, whole expression results in false.
501 if (i + AndExpression[i].FJumpOffset == ResultIdx)
502 AndExpression[i].FJumpOffset += RHSSize;
503 }
504
505 AndExpression.append(RHS.Expression);
506
507 return Predicate(std::move(AndExpression));
508 }
509
510 Predicate operator||(const Predicate &RHS) const {
511 SmallVector<Elt, 8> OrExpression = Expression;
512
513 unsigned RHSSize = RHS.Expression.size();
514 unsigned ResultIdx = Expression.size();
515 for (unsigned i = 0; i < ResultIdx; ++i) {
516 // LHS results in true, whole expression results in true.
517 if (i + OrExpression[i].TJumpOffset == ResultIdx)
518 OrExpression[i].TJumpOffset += RHSSize;
519 }
520
521 OrExpression.append(RHS.Expression);
522
523 return Predicate(std::move(OrExpression));
524 }
525};
526
527// Initialize rules
530 : ST(&_ST), MRI(&_MRI) {
531
532 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
533 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
534 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
535 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
536 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
538 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
539 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
540 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
541
542 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
543 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
544 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
545
546 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
548 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
549
550 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
551 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
552 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
553 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
554 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
556 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
557
558 bool HasVecMulU64 = ST->hasVMulU64Inst();
559 addRulesForGOpcs({G_MUL}, Standard)
560 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
561 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
562 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
563 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
565 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
566 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
567 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
568 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
569
570 bool hasMulHi = ST->hasScalarMulHiInsts();
571 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
572 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
573 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
574 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
575
576 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
577 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
579
580 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
581 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
582 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
583 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
584
585 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
587 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
588 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
589 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
590 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
591 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
592 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
593 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
594
595 addRulesForGOpcs({G_SHL}, Standard)
596 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
597 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
599 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
600 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
601 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
602 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
603 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
604
605 addRulesForGOpcs({G_LSHR}, Standard)
606 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
607 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
609 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
610 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
611 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
612 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
613 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
614
615 addRulesForGOpcs({G_ASHR}, Standard)
616 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
617 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
619 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
620 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
621 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
622 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
623 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
624
625 addRulesForGOpcs({G_FSHR}, Standard)
626 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
627 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
628
629 addRulesForGOpcs({G_BSWAP}, Standard)
630 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
631 .Div(S16, {{Vgpr16}, {Vgpr16}})
632 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
633 .Div(S32, {{Vgpr32}, {Vgpr32}})
634 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
635 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
636
637 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
638 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
639 G_AMDGPU_RCP_IFLAG},
640 Standard)
641 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
642 .Div(S32, {{Vgpr32}, {Vgpr32}});
643
644 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
645
646 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
647 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
648 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
649 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
650 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
651
652 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
653 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
654 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
655 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
656 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
658 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
659
660 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
661 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
662 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
663 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
664 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
666 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
667
668 addRulesForGOpcs({G_IMPLICIT_DEF})
669 .Any({{UniS1}, {{Sgpr32Trunc}, {}}})
670 .Any({{UniS16}, {{Sgpr16}, {}}})
671 .Any({{UniBRC}, {{SgprBRC}, {}}});
672
673 addRulesForGOpcs({G_CONSTANT}, Standard)
674 .Any({{UniS1, _}, {{Sgpr32Trunc}, {}, UniCstExt}})
675 .Uni(S16, {{Sgpr16}, {}})
676 .Uni(S32, {{Sgpr32}, {}})
677 .Uni(S64, {{Sgpr64}, {}})
678 .Any({{UniPtr32, _}, {{SgprPtr32}, {}}})
679 .Any({{UniPtr64, _}, {{SgprPtr64}, {}}});
680
681 addRulesForGOpcs({G_FCONSTANT}, Standard)
682 .Uni(S16, {{Sgpr16}, {}})
683 .Uni(S32, {{Sgpr32}, {}})
684 .Uni(S64, {{Sgpr64}, {}});
685
686 addRulesForGOpcs({G_FREEZE})
687 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
688 .Any({{DivS1}, {{Vcc}, {Vcc}}})
689 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
690 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
691 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
692
693 addRulesForGOpcs({G_BITCAST})
694 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
695 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
696
697 addRulesForGOpcs({G_UNMERGE_VALUES})
698 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
699 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
700 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
701
702 addRulesForGOpcs({G_BUILD_VECTOR})
703 .Any({{UniBRC, S16}, {{}, {}, VerifyAllSgpr}})
704 .Any({{UniBRC, BRC}, {{}, {}, VerifyAllSgpr}})
705 .Any({{DivBRC, S16}, {{}, {}, ApplyAllVgpr}})
706 .Any({{DivBRC, BRC}, {{}, {}, ApplyAllVgpr}});
707
708 addRulesForGOpcs({G_MERGE_VALUES, G_CONCAT_VECTORS})
709 .Any({{UniBRC, BRC}, {{}, {}, VerifyAllSgpr}})
710 .Any({{DivBRC, BRC}, {{}, {}, ApplyAllVgpr}});
711
712 addRulesForGOpcs({G_PHI})
713 .Any({{UniS1}, {{}, {}, AextToS32InIncomingBlockGPHI}})
714 .Any({{UniS16}, {{}, {}, VerifyAllSgprGPHI}})
715 .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}})
716 .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}});
717
718 addRulesForGOpcs({G_EXTRACT_VECTOR_ELT})
719 .Any({{UniB32, UniBRC, UniS32}, {{SgprB32}, {SgprBRC, Sgpr32}}})
720 .Any({{DivB32, DivBRC, UniS32}, {{VgprB32}, {VgprBRC, Sgpr32}}})
721 .Any({{DivB32, BRC, DivS32},
723 .Any({{UniB64, UniBRC, UniS32}, {{SgprB64}, {SgprBRC, Sgpr32}}})
724 .Any({{DivB64, DivBRC, UniS32},
726 .Any({{DivB64, BRC, DivS32},
728
729 addRulesForGOpcs({G_INSERT_VECTOR_ELT})
731 {{SgprBRC}, {SgprBRC, SgprB32, Sgpr32}}})
732 .Any(
733 {{DivBRC, BRC, B32, UniS32}, {{VgprBRC}, {VgprBRC, VgprB32, Sgpr32}}})
734 .Any({{DivBRC, BRC, B32, DivS32},
738 .Any({{DivBRC, BRC, B64, UniS32},
740 .Any({{DivBRC, BRC, B64, DivS32},
742
743 // INTERSECT_RAY {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
744 // INTERSECT_RAY {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
745 addRulesForGOpcs({G_AMDGPU_BVH_INTERSECT_RAY, G_AMDGPU_BVH_DUAL_INTERSECT_RAY,
746 G_AMDGPU_BVH8_INTERSECT_RAY})
747 .Any({{}, {{}, {}, ApplyBVH_INTERSECT_RAY}});
748
749 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
750 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
751 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
752 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
753 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
754 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
755 G_AMDGPU_INTRIN_IMAGE_STORE,
756 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
757 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
758
759 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
760 auto Pred =
761 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
762 return CmpInst::isSigned(Pred);
763 });
764
765 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
766 auto Pred =
767 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
768 return ICmpInst::isEquality(Pred);
769 });
770
771 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
772 // clang-format off
773 addRulesForGOpcs({G_ICMP})
774 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
775 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
776 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
777 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
778 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
779 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
780 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
781 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
782 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
783 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
784 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
785 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
786 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
787 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
788 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
789 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
790 // clang-format on
791
792 addRulesForGOpcs({G_BRCOND})
793 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
794 .Any({{DivS1}, {{}, {Vcc}}});
795
796 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
797
798 addRulesForGOpcs({G_SELECT}, StandardB)
799 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
801 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
805
806 addRulesForGOpcs({G_ANYEXT})
807 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
808 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
809 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
810 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
811 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
812 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
813 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
814 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
815 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
816 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
817
818 bool Has16bitCmp = ST->has16BitInsts();
819
820 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
821 // It is up to user to deal with truncated bits.
822 // S1, S16, S32 and S64 results are handled with specific rules. Remaining
823 // (result, source) pairs with valid register classes are covered by the
824 // generic UniBRC/DivBRC wildcard rules.
825 addRulesForGOpcs({G_TRUNC})
826 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
827 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
828 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
829 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
830 .Any({{UniBRC, UniBRC}, {{SgprBRC}, {SgprBRC}}})
831 .Any({{DivBRC, DivBRC}, {{VgprBRC}, {VgprBRC}}})
832 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
833 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
834 // This is non-trivial. VgprToVccCopy is done using compare instruction.
835 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
837 !Has16bitCmp)
838 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
839 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
840
841 addRulesForGOpcs({G_ZEXT})
845 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
846 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
847 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
848 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
849 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
850 // not extending S16 to S32 is questionable.
851 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
852 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
853 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
854 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
855
856 addRulesForGOpcs({G_SEXT})
860 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
861 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
862 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
863 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
864 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
865 // not extending S16 to S32 is questionable.
866 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
867 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
868 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
869 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
870
871 addRulesForGOpcs({G_SEXT_INREG})
872 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
873 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
874 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
876
877 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
878 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
879 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
880 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
881 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
882
883 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
884 .Uni(S32, {{Sgpr32}, {Sgpr32}})
885 .Div(S32, {{Vgpr32}, {Vgpr32}})
886 .Uni(S64, {{Sgpr64}, {Sgpr64}})
887 .Div(S64, {{Vgpr64}, {Vgpr64}})
888 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
889 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
890 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
891 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
892
893 // Atomic read-modify-write operations: result and value are always VGPR,
894 // pointer varies by address space.
895 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
896 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
897 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
898 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
899 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
900 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
901 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
902 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
903 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
904 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
905 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
906
907 addRulesForGOpcs({G_ATOMICRMW_USUB_SAT, G_ATOMICRMW_USUB_COND})
908 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, Vgpr32}}})
909 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, Vgpr32}}})
910 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32}}});
911
912 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
913 bool HasAtomicBufferGlobalPkAddF16Insts =
914 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
915 ST->hasAtomicBufferGlobalPkAddF16Insts();
916 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
917 addRulesForGOpcs({G_ATOMICRMW_FADD})
918 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
919 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
920 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
921 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
922 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
923 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
924 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
925 HasAtomicFlatPkAdd16Insts)
926 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
927 HasAtomicBufferGlobalPkAddF16Insts)
928 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
929 HasAtomicDsPkAdd16Insts);
930
931 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
932 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
933 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
934 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
935 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
936
937 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
938 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
939 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
940 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
941 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
942
943 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
944 .Div(S32, {{Vgpr32},
946 .Div(S64, {{Vgpr64},
948
949 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
950 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
951 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
952 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
953 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
954 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
955 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
956 Standard)
959
960 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
961 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
962 bool usesTrue16 = ST->useRealTrue16Insts();
963
964 Predicate isAlign16([](const MachineInstr &MI) -> bool {
965 return (*MI.memoperands_begin())->getAlign() >= Align(16);
966 });
967
968 Predicate isAlign4([](const MachineInstr &MI) -> bool {
969 return (*MI.memoperands_begin())->getAlign() >= Align(4);
970 });
971
972 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
973 return (*MI.memoperands_begin())->isAtomic();
974 });
975
976 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
977 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
978 });
979
980 Predicate isConst([](const MachineInstr &MI) -> bool {
981 // Address space in MMO be different then address space on pointer.
982 const MachineMemOperand *MMO = *MI.memoperands_begin();
983 const unsigned AS = MMO->getAddrSpace();
984 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
986 });
987
988 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
989 return (*MI.memoperands_begin())->isVolatile();
990 });
991
992 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
993 return (*MI.memoperands_begin())->isInvariant();
994 });
995
996 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
997 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
998 });
999
1000 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
1001 const MachineMemOperand *MMO = *MI.memoperands_begin();
1002 return MMO->getAlign() >= Align(MMO->getSize().getValue());
1003 });
1004
1005 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
1006 const MachineMemOperand *MMO = *MI.memoperands_begin();
1007 const unsigned MemSize = 8 * MMO->getSize().getValue();
1008 return MemSize == 16 || MemSize == 8;
1009 });
1010
1011 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
1012 const MachineMemOperand *MMO = *MI.memoperands_begin();
1013 return 8 * MMO->getSize().getValue() == 32;
1014 });
1015
1016 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
1017 (isConst || isInvMMO || isNoClobberMMO);
1018
1019 // clang-format off
1020 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
1021 addRulesForGOpcs({G_LOAD})
1022 // flat, addrspace(0), never uniform - flat_load
1023 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
1024 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1025 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
1026 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
1027 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
1028
1029 // global, addrspace(1)
1030 // divergent - global_load
1031 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
1032 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
1033 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
1034 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
1035 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
1036 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
1037 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
1038
1039 // uniform - s_load
1040 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
1041 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1042 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1043 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
1044 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1045 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
1046 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
1047 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
1048 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
1049 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
1050 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
1051 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
1052 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
1053
1054 // Uniform via global or buffer load, for example volatile or non-aligned
1055 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
1056 // selected as global_load, use SgprP1 for pointer instead to match
1057 // patterns without flat-for-global, default for GFX7 and older.
1058 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
1059 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
1060 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
1061 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1062 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1063 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1064 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
1065 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
1066 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
1067 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
1068 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
1069
1070 // local, addrspace(3) - ds_load
1071 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
1072 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1073 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
1074 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
1075 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
1076
1077 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
1078 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1079 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
1080 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
1081 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
1082
1083 // constant, addrspace(4)
1084 // divergent - global_load
1085 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
1086 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1087 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
1088 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
1089 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
1090 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
1091 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
1092
1093 // uniform - s_load
1094 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1095 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1096 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1097 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1098 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
1099 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
1100 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
1101 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
1102 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
1103 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
1104 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
1105 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
1106
1107 // uniform in vgpr - global_load or buffer_load
1108 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1109 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1110 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1111 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1112 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1113 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1114 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1115 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1116 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1117
1118 // private, addrspace(5), never uniform - scratch_load
1119 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1120 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1121 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1122 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1123 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1124
1125 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1126
1127
1128 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1129 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1130
1131 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1132 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1133 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1134 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1135 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1136
1137 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1138 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1139
1140 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1141 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1142 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1143 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1144 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1145
1146 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
1147
1148 addRulesForGOpcs({G_STORE})
1149 // addrspace(0)
1150 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1151 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1152 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1153 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1154 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1155
1156 // addrspace(1), there are no stores to addrspace(4)
1157 // For targets:
1158 // - with "+flat-for-global" - global_store
1159 // - without(-flat-for-global) - buffer_store addr64
1160 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1161 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1162 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1163 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1164 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1165
1166 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1167 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1168 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1169 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1170 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1171 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1172 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1173 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1174
1175 // addrspace(3) and addrspace(5)
1176 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1177 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1178 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1179 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1180 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1181
1182 // clang-format on
1183
1184 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1185 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1186 StandardB)
1195
1196 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1197 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1198 StandardB)
1201
1202 addRulesForGOpcs(
1203 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1204 StandardB)
1207
1208 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1209 StandardB)
1217 .Any({{UniB160},
1219
1220 addRulesForGOpcs(
1221 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1222 StandardB)
1229
1230 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1231 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1232 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1233 G_AMDGPU_TBUFFER_STORE_FORMAT,
1234 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1235 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1236 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1237 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1238 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1239
1240 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1241 // address components are VGPR.
1242 //
1243 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1244 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1245 // idxen_imm
1246 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1247 .Any({{S32, S32, V4S32, S32, S32, S32},
1249 .Any({{S64, S64, V4S32, S32, S32, S32},
1251 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1252 {{VgprV2S16},
1254
1255 addRulesForGOpcs({G_PTR_ADD})
1256 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1257 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1258 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1259 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1260
1261 addRulesForGOpcs({G_INTTOPTR})
1262 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1263 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1264 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1265 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1266 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1267 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1268
1269 addRulesForGOpcs({G_PTRTOINT})
1270 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1271 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1272 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1273 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1274 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1275 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1276
1277 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1278 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1279 addRulesForGOpcs({G_PTRMASK})
1280 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1281 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1282 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1283 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1284
1285 addRulesForGOpcs({G_ABS}, Standard)
1286 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}})
1287 .Div(S16, {{Vgpr16}, {Vgpr16}, AbsToNegMax})
1288 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1289 .Div(S32, {{Vgpr32}, {Vgpr32}, AbsToNegMax})
1290 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, AbsToS32})
1291 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}, AbsToNegMax});
1292
1293 addRulesForGOpcs({G_BITREVERSE}, Standard)
1294 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1295 .Div(S32, {{Vgpr32}, {Vgpr32}})
1296 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1297 .Div(S64, {{Vgpr64}, {Vgpr64}});
1298
1299 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_POISON,
1300 G_CTTZ_ZERO_POISON})
1301 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1302 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1303 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1305
1306 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1307
1308 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1309 .Uni(S64, {{Sgpr64}, {}});
1310
1311 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1312
1313 addRulesForGOpcs({G_GLOBAL_VALUE})
1314 .Any({{UniP0}, {{SgprP0}, {}}})
1315 .Any({{UniP1}, {{SgprP1}, {}}})
1316 .Any({{UniP3}, {{SgprP3}, {}}})
1317 .Any({{UniP4}, {{SgprP4}, {}}})
1318 .Any({{UniP8}, {{SgprP8}, {}}});
1319
1320 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1321
1322 addRulesForGOpcs({G_SI_CALL})
1323 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1324 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1325 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1326 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1327
1328 bool hasSALUFloat = ST->hasSALUFloatInsts();
1329
1330 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1331 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1332 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1333 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1334 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1335 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1336 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1337 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1338 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1339 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1341 hasSALUFloat)
1342 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1343
1344 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1345 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1346 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1347 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1348 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1349 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1350 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1351
1352 addRulesForGOpcs({G_FMAD}, Standard)
1353 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1354 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1355 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1356 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1357
1358 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1359 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1360 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1361 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1362 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1363 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1364 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1365
1366 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1367 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1368 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1369 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1370 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1374 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1375 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1376 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1377 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1378 .Uni(V2S16,
1380 hasSALUFloat)
1382 !hasSALUFloat);
1383
1384 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1385 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1386 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1387 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1388 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1389
1390 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1391 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1392 // instructions on SALU.
1393 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1394 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1395 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1396
1397 // FNEG and FABS are either folded as source modifiers or can be selected as
1398 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1399 // targets without SALU float we still select them as VGPR since there would
1400 // be no real sgpr use.
1401 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1402 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1403 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1404 .Div(S16, {{Vgpr16}, {Vgpr16}})
1405 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1406 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1407 .Div(S32, {{Vgpr32}, {Vgpr32}})
1408 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1409 .Div(S64, {{Vgpr64}, {Vgpr64}})
1410 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1411 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1412 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1413 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1414 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1415
1416 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1417 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1418 .Div(S32, {{Vgpr32}, {Vgpr32}})
1419 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1420 .Div(S16, {{Vgpr16}, {Vgpr16}})
1421 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1422 .Div(S64, {{Vgpr64}, {Vgpr64}})
1423 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1424 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1425 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1426 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1427
1428 bool hasPST = ST->hasPseudoScalarTrans();
1429 addRulesForGOpcs({G_FSQRT}, Standard)
1430 .Div(S16, {{Vgpr16}, {Vgpr16}})
1431 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1432 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1433
1434 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1435 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1436 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1437 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1438 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1439 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1440 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1441 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1442 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1443 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1444 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1445
1446 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1447 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1448 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1449 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1450 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1451 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1452 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1453 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1454 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1455 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1456 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1457
1458 addRulesForGOpcs({G_AMDGPU_S_BUFFER_PREFETCH})
1460
1461 addRulesForGOpcs({G_FPEXT})
1462 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1463 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1464 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1465 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1466 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1467
1468 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1469 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1470 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1471
1472 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1473 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1474 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1475
1476 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1477
1478 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1479 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
1480 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
1481 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1482 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
1483 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
1484 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1485 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1486 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1488 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1489
1490 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM,
1491 G_FMINIMUMNUM, G_FMAXIMUMNUM},
1492 Standard)
1493 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1494 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1495 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1496 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1498 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1499 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1500 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1501 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1502 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1503
1504 addRulesForGOpcs({G_FPTRUNC})
1505 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1506 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1507 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1509 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1510 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1511 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1512
1513 addRulesForGOpcs({G_IS_FPCLASS})
1514 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1515 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1516 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1517 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1518 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1519 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1520
1521 addRulesForGOpcs({G_FCMP}, Standard)
1522 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1523 hasSALUFloat)
1524 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1525 !hasSALUFloat)
1526 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1527 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1528 hasSALUFloat)
1529 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1530 !hasSALUFloat)
1531 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1532 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1533 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1534
1535 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1536 G_FEXP2, G_FLOG2},
1537 Standard)
1538 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1539 .Div(S16, {{Vgpr16}, {Vgpr16}})
1540 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1541 .Div(S32, {{Vgpr32}, {Vgpr32}})
1542 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1543 .Div(S64, {{Vgpr64}, {Vgpr64}});
1544
1545 addRulesForGOpcs({G_AMDGPU_GLOBAL_LOAD_MONITOR, G_AMDGPU_FLAT_LOAD_MONITOR},
1546 StandardB)
1547 .Uni(B32, {{UniInVgprB32}, {SgprPtr64}})
1548 .Div(B32, {{VgprB32}, {VgprPtr64}})
1549 .Uni(B64, {{UniInVgprB64}, {SgprPtr64}})
1550 .Div(B64, {{VgprB64}, {VgprPtr64}})
1551 .Uni(B128, {{UniInVgprB128}, {SgprPtr64}})
1552 .Div(B128, {{VgprB128}, {VgprPtr64}});
1553
1554 using namespace Intrinsic;
1555
1556 addRulesForIOpcs({returnaddress}).Any({{UniP0}, {{SgprP0}, {}}});
1557
1558 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1559
1560 addRulesForIOpcs({amdgcn_s_getreg}).Any({{}, {{Sgpr32}, {IntrId, Imm}}});
1561
1562 addRulesForIOpcs({amdgcn_s_setreg})
1563 .Any({{_, _, S32}, {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1564
1565 addRulesForIOpcs({amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1566 .Any({{}, {{}, {IntrId, Imm, SgprB32_M0}}});
1567
1568 addRulesForIOpcs({amdgcn_s_sendmsg_rtn})
1569 .Any({{S32}, {{Sgpr32}, {}}})
1570 .Any({{S64}, {{Sgpr64}, {}}});
1571
1572 addRulesForIOpcs({amdgcn_s_memrealtime, amdgcn_s_memtime}, Standard)
1573 .Uni(S64, {{Sgpr64}, {IntrId}});
1574
1575 addRulesForIOpcs({amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1576 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1577 Standard)
1578 .Uni(S32, {{Sgpr32}, {IntrId}});
1579
1580 // Intrinsics with no register operands.
1581 addRulesForIOpcs({amdgcn_asyncmark,
1582 amdgcn_endpgm,
1583 amdgcn_init_exec,
1584 amdgcn_s_barrier,
1585 amdgcn_s_barrier_leave,
1586 amdgcn_s_barrier_signal,
1587 amdgcn_s_barrier_wait,
1588 amdgcn_s_monitor_sleep,
1589 amdgcn_s_nop,
1590 amdgcn_s_sethalt,
1591 amdgcn_s_setprio,
1592 amdgcn_s_setprio_inc_wg,
1593 amdgcn_s_sleep,
1594 amdgcn_s_ttracedata_imm,
1595 amdgcn_s_wait_asynccnt,
1596 amdgcn_s_wait_bvhcnt,
1597 amdgcn_s_wait_dscnt,
1598 amdgcn_s_wait_event,
1599 amdgcn_s_wait_event_export_ready,
1600 amdgcn_s_wait_expcnt,
1601 amdgcn_s_wait_kmcnt,
1602 amdgcn_s_wait_loadcnt,
1603 amdgcn_s_wait_samplecnt,
1604 amdgcn_s_wait_storecnt,
1605 amdgcn_s_wait_tensorcnt,
1606 amdgcn_s_waitcnt,
1607 amdgcn_unreachable,
1608 amdgcn_wait_asyncmark,
1609 amdgcn_wave_barrier})
1610 .Any({{}, {{}, {}}});
1611
1612 addRulesForIOpcs({amdgcn_init_exec_from_input})
1613 .Any({{}, {{}, {IntrId, Sgpr32, Imm}}});
1614
1615 addRulesForIOpcs({amdgcn_s_ttracedata}).Any({{}, {{}, {IntrId, SgprB32_M0}}});
1616
1617 addRulesForIOpcs({amdgcn_s_sleep_var})
1618 .Any({{}, {{}, {IntrId, SgprB32_ReadFirstLane}}});
1619
1620 addRulesForIOpcs({amdgcn_s_barrier_join, amdgcn_s_wakeup_barrier})
1621 .Any({{}, {{}, {IntrId, SgprB32_M0}}});
1622
1623 addRulesForIOpcs({amdgcn_s_barrier_signal_var, amdgcn_s_barrier_init})
1624 .Any({{}, {{}, {IntrId, SgprB32_M0, SgprB32_M0}}});
1625
1626 addRulesForIOpcs({amdgcn_s_barrier_signal_isfirst})
1627 .Any({{UniS1}, {{Sgpr32Trunc}, {}}});
1628
1629 addRulesForIOpcs(
1630 {amdgcn_s_get_named_barrier_state, amdgcn_s_get_barrier_state}, Standard)
1631 .Uni(S32, {{Sgpr32}, {IntrId, SgprB32_M0}});
1632
1633 addRulesForIOpcs({amdgcn_flat_prefetch}).Any({{}, {{}, {IntrId, VgprP0}}});
1634
1635 addRulesForIOpcs({amdgcn_global_prefetch}).Any({{}, {{}, {IntrId, VgprP1}}});
1636
1637 addRulesForIOpcs({amdgcn_s_prefetch_data})
1639
1640 addRulesForIOpcs({amdgcn_class})
1641 .Any({{UniS1, _, S16}, {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
1642 .Any({{DivS1, _, S16}, {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
1643 .Any({{UniS1, _, S32}, {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
1644 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
1645 .Any({{UniS1, _, S64}, {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
1646 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
1647
1648 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1649 addRulesForIOpcs({amdgcn_end_cf})
1650 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1651 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1652
1653 addRulesForIOpcs({amdgcn_if_break}, Standard)
1654 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1655 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1656
1657 addRulesForIOpcs({amdgcn_exp})
1658 .Any({{_, _, _, S32, S32, S32, S32},
1659 {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1660
1661 addRulesForIOpcs({amdgcn_exp_compr})
1662 .Any({{_, _, _, V2S16}, {{}, {IntrId, Imm, Imm, VgprV2S16, VgprV2S16}}});
1663
1664 addRulesForIOpcs({amdgcn_exp_row})
1665 .Any({{_, _, _, S32, S32, S32, S32, _, S32},
1666 {{},
1668 SgprB32_M0}}});
1669
1670 addRulesForIOpcs({amdgcn_lds_direct_load}, StandardB)
1671 .Div(B32, {{VgprB32}, {IntrId, SgprB32_M0}});
1672
1673 addRulesForIOpcs({amdgcn_lds_param_load}, Standard)
1674 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1675
1676 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1677 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1678
1679 addRulesForIOpcs({amdgcn_readfirstlane})
1680 .Any({{UniB32, _, DivB32}, {{}, {SgprB32, None, VgprB32}}})
1681 // this should not exist in the first place, it is from call lowering
1682 // readfirstlaning just in case register is not in sgpr.
1683 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1684
1685 addRulesForIOpcs({amdgcn_readlane}, StandardB)
1687
1688 addRulesForIOpcs({amdgcn_writelane}, StandardB)
1689 .Div(B32,
1690 {{VgprB32},
1692
1693 addRulesForIOpcs({amdgcn_add_max_i32, amdgcn_add_max_u32, amdgcn_add_min_i32,
1694 amdgcn_add_min_u32},
1695 Standard)
1696 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1697 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1698
1699 addRulesForIOpcs({amdgcn_pk_add_max_i16, amdgcn_pk_add_max_u16,
1700 amdgcn_pk_add_min_i16, amdgcn_pk_add_min_u16},
1701 Standard)
1704
1705 addRulesForIOpcs({amdgcn_permlane16, amdgcn_permlanex16}, Standard)
1706 .Div(S32, {{Vgpr32},
1709
1710 addRulesForIOpcs({amdgcn_permlane_bcast, amdgcn_permlane_up,
1711 amdgcn_permlane_down, amdgcn_permlane_xor},
1712 StandardB)
1713 .Div(B32,
1714 {{VgprB32},
1716
1717 addRulesForIOpcs({amdgcn_permlane_idx_gen}, Standard)
1719
1720 addRulesForIOpcs({amdgcn_perm}, Standard)
1721 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1722 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1723
1724 addRulesForIOpcs(
1725 {amdgcn_wave_reduce_add, amdgcn_wave_reduce_and, amdgcn_wave_reduce_fadd,
1726 amdgcn_wave_reduce_fmax, amdgcn_wave_reduce_fmin,
1727 amdgcn_wave_reduce_fsub, amdgcn_wave_reduce_max, amdgcn_wave_reduce_min,
1728 amdgcn_wave_reduce_or, amdgcn_wave_reduce_sub, amdgcn_wave_reduce_umax,
1729 amdgcn_wave_reduce_umin, amdgcn_wave_reduce_xor},
1730 Standard)
1731 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1732 .Div(S32, {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1733 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64}})
1734 .Div(S64, {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1735
1736 addRulesForIOpcs({amdgcn_bitop3, amdgcn_fmad_ftz}, Standard)
1737 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1738 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1739 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1740 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1741
1742 addRulesForIOpcs({amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1743 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1744 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1745 Standard)
1746 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1747 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1748
1749 addRulesForIOpcs({amdgcn_rsq, amdgcn_rsq_clamp}, Standard)
1750 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
1751 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
1752 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1753 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
1754 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST)
1755 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1756 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1757 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1758
1759 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1760 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1761 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1762 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1763 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1764
1765 addRulesForIOpcs({amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1766 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1767 amdgcn_mulhi_u24},
1768 Standard)
1769 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1770 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1771
1772 addRulesForIOpcs({amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
1773 amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
1774 amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
1775 Standard)
1776 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1777 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1778
1779 addRulesForIOpcs({amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
1780 amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
1781 Standard)
1782 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1783 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1784
1785 addRulesForIOpcs({amdgcn_cvt_pk_f32_bf8, amdgcn_cvt_pk_f32_fp8})
1786 .Any({{UniV2S32}, {{UniInVgprV2S32}, {IntrId, Vgpr32}}})
1787 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, Vgpr32}}});
1788
1789 addRulesForIOpcs({amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1790 amdgcn_fma_legacy},
1791 Standard)
1792 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1793 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1794
1795 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1796 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1797 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1798 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1799 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1800 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1801 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1802
1803 addRulesForIOpcs({amdgcn_prng_b32})
1804 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1805 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1806
1807 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1808 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1809 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1810
1811 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1812 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1813 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1814 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1815 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1816
1817 addRulesForIOpcs({amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1818 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1819 Standard)
1820 .Uni(V2S16, {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1821 .Div(V2S16, {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1822
1823 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
1824 amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
1825 amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
1826 amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
1827 Standard)
1829
1830 addRulesForIOpcs({amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
1831 amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
1832 Standard)
1834
1835 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1836 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1}}})
1837 .Any({{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1}}})
1838 .Any({{DivB32, _, UniP1}, {{VgprB32}, {IntrId, SgprP1}}})
1839 .Any({{DivB32, _, DivP1}, {{VgprB32}, {IntrId, VgprP1}}});
1840
1841 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1842 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1}}})
1843 .Any({{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1}}})
1844 .Any({{DivB128, _, UniP1}, {{VgprB128}, {IntrId, SgprP1}}})
1845 .Any({{DivB128, _, DivP1}, {{VgprB128}, {IntrId, VgprP1}}});
1846
1847 addRulesForIOpcs({amdgcn_global_load_tr4_b64})
1848 .Any({{DivV2S32, _, UniP1}, {{VgprV2S32}, {IntrId, SgprP1}}})
1849 .Any({{DivV2S32, _, DivP1}, {{VgprV2S32}, {IntrId, VgprP1}}});
1850
1851 addRulesForIOpcs({amdgcn_global_load_tr6_b96})
1852 .Any({{DivV3S32, _, UniP1}, {{VgprV3S32}, {IntrId, SgprP1}}})
1853 .Any({{DivV3S32, _, DivP1}, {{VgprV3S32}, {IntrId, VgprP1}}});
1854
1855 addRulesForIOpcs({amdgcn_ds_load_tr4_b64, amdgcn_ds_load_tr8_b64})
1856 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
1857
1858 addRulesForIOpcs({amdgcn_ds_load_tr6_b96})
1859 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
1860
1861 addRulesForIOpcs({amdgcn_ds_load_tr16_b128})
1862 .Any({{DivB128}, {{VgprB128}, {IntrId, VgprP3}}});
1863
1864 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1865 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1866
1867 addRulesForIOpcs(
1868 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
1869 .Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1870
1871 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1872 Standard)
1873 .Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1874
1875 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1876 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1877
1878 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1879 .Any({{_},
1880 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1881
1882 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1883 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1884
1885 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1886 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1887
1888 addRulesForIOpcs({amdgcn_global_load_lds})
1889 .Any({{}, {{}, {IntrId, VgprP1, SgprB32_M0}}});
1890
1891 addRulesForIOpcs({amdgcn_global_load_async_to_lds_b8,
1892 amdgcn_global_load_async_to_lds_b32,
1893 amdgcn_global_load_async_to_lds_b64,
1894 amdgcn_global_load_async_to_lds_b128,
1895 amdgcn_global_store_async_from_lds_b8,
1896 amdgcn_global_store_async_from_lds_b32,
1897 amdgcn_global_store_async_from_lds_b64,
1898 amdgcn_global_store_async_from_lds_b128})
1899 .Any({{}, {{}, {IntrId, VgprP1, VgprP3}}});
1900
1901 addRulesForIOpcs({amdgcn_cluster_load_b32})
1903 .Any({{DivB32, _, UniP1}, {{VgprB32}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1904 .Any(
1905 {{DivB32, _, DivP1}, {{VgprB32}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1906
1907 addRulesForIOpcs({amdgcn_cluster_load_b64})
1909 .Any({{DivB64, _, UniP1}, {{VgprB64}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1910 .Any(
1911 {{DivB64, _, DivP1}, {{VgprB64}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1912
1913 addRulesForIOpcs({amdgcn_cluster_load_b128})
1915 .Any({{DivB128, _, UniP1},
1916 {{VgprB128}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
1917 .Any({{DivB128, _, DivP1},
1918 {{VgprB128}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
1919
1920 addRulesForIOpcs({amdgcn_cluster_load_async_to_lds_b8,
1921 amdgcn_cluster_load_async_to_lds_b32,
1922 amdgcn_cluster_load_async_to_lds_b64,
1923 amdgcn_cluster_load_async_to_lds_b128})
1924 .Any({{}, {{}, {IntrId, VgprP1, VgprP3, Imm, Imm, SgprB32_M0}}});
1925
1926 addRulesForIOpcs({amdgcn_perm_pk16_b4_u4}, StandardB)
1927 .Uni(B64, {{UniInVgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}})
1928 .Div(B64, {{VgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}});
1929
1930 addRulesForIOpcs({amdgcn_perm_pk16_b6_u4}, StandardB)
1932 .Div(B96, {{VgprB96}, {IntrId, Vgpr32, VgprB64, VgprV2S32}});
1933
1934 addRulesForIOpcs({amdgcn_perm_pk16_b8_u4}, StandardB)
1936 .Div(B128, {{VgprB128}, {IntrId, VgprB64, VgprB64, VgprV2S32}});
1937
1938 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1939 amdgcn_strict_wqm},
1940 StandardB)
1941 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1942 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1943 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1944 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1945 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1946 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1947 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1948 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1949 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1950 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1951 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1952 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1953
1954 addRulesForIOpcs({amdgcn_kill, amdgcn_wqm_demote})
1955 .Any({{}, {{}, {IntrId, Vcc}}});
1956
1957 addRulesForIOpcs({amdgcn_ballot}, Standard)
1958 .Uni(S64, {{Sgpr64}, {IntrId, Vcc}})
1959 .Uni(S32, {{Sgpr32}, {IntrId, Vcc}});
1960
1961 addRulesForIOpcs({amdgcn_inverse_ballot})
1962 .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}})
1963 .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}});
1964
1965 addRulesForIOpcs({amdgcn_live_mask, amdgcn_ps_live})
1966 .Any({{DivS1}, {{Vcc}, {}}});
1967
1968 addRulesForIOpcs({amdgcn_mov_dpp, amdgcn_mov_dpp8}, StandardB)
1969 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1970 .Div(B64, {{VgprB64}, {IntrId, VgprB64}});
1971
1972 addRulesForIOpcs({amdgcn_update_dpp}, StandardB)
1973 .Div(B32, {{VgprB32}, {IntrId, VgprB32, VgprB32}})
1974 .Div(B64, {{VgprB64}, {IntrId, VgprB64, VgprB64}});
1975
1976 addRulesForIOpcs({amdgcn_sin, amdgcn_cos}, Standard)
1977 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1978 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1979 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1980 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}});
1981
1982 addRulesForIOpcs({amdgcn_trig_preop}, Standard)
1983 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32}})
1984 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr32}});
1985
1986 addRulesForIOpcs({amdgcn_exp2}, Standard)
1987 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1988 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
1989 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
1990 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1991 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
1992 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
1993
1994 addRulesForIOpcs({amdgcn_rcp, amdgcn_sqrt}, Standard)
1995 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1996 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
1997 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
1998 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1999 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2000 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST)
2001 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}})
2002 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}});
2003
2004 addRulesForIOpcs({amdgcn_log}, Standard)
2005 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
2006 .Uni(S16, {{Sgpr16}, {IntrId, Sgpr16}}, hasPST)
2007 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}}, !hasPST)
2008 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2009 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}}, hasPST)
2010 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}}, !hasPST);
2011
2012 addRulesForIOpcs({amdgcn_ds_atomic_async_barrier_arrive_b64})
2013 .Any({{}, {{}, {IntrId, VgprP3}}});
2014
2015 addRulesForIOpcs({amdgcn_ds_atomic_barrier_arrive_rtn_b64}, Standard)
2016 .Div(S64, {{Vgpr64}, {IntrId, VgprP3, Vgpr64}});
2017
2018 addRulesForIOpcs({amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
2019 Standard)
2020 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
2021 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32}});
2022
2023 addRulesForIOpcs({amdgcn_ds_append, amdgcn_ds_consume}, Standard)
2024 .Uni(S32, {{UniInVgprS32}, {IntrId, SgprB32_M0}})
2025 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0}});
2026
2027 addRulesForIOpcs(
2028 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard)
2029 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
2030
2031 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn}, Standard)
2032 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2033
2034 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn}, Standard)
2035 .Div(S64, {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2036
2037 addRulesForIOpcs({amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
2038 amdgcn_ds_gws_sema_release_all})
2039 .Any({{}, {{}, {IntrId, SgprB32_M0}}});
2040
2041 addRulesForIOpcs(
2042 {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
2043 .Any({{}, {{}, {IntrId, Vgpr32, SgprB32_M0}}});
2044
2045 addRulesForIOpcs({amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap}, Standard)
2046 .Div(S32, {{Vgpr32}, {IntrId, SgprB32_M0, Vgpr32}});
2047
2048 addRulesForIOpcs({amdgcn_ds_swizzle}, Standard)
2049 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
2050 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
2051
2052 addRulesForIOpcs({amdgcn_permlane16_var, amdgcn_permlanex16_var}, Standard)
2053 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2054
2055 addRulesForIOpcs({amdgcn_permlane16_swap, amdgcn_permlane32_swap}, Standard)
2056 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
2057
2058 addRulesForIOpcs({amdgcn_permlane64}, StandardB)
2059 .Div(B32, {{VgprB32}, {IntrId, VgprB32}});
2060
2061 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
2062 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
2063
2064 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
2065 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
2066
2067 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
2068 .Any({{DivV4S16}, {{VgprV4S16}, {IntrId, VgprP3}}});
2069
2070 addRulesForIOpcs({amdgcn_interp_p1}, Standard)
2071 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, SgprB32_M0}});
2072
2073 addRulesForIOpcs({amdgcn_interp_p1_f16}, Standard)
2074 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, Imm, SgprB32_M0}});
2075
2076 addRulesForIOpcs({amdgcn_interp_p2}, Standard)
2077 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Imm, Imm, SgprB32_M0}});
2078
2079 addRulesForIOpcs({amdgcn_interp_p2_f16}, Standard)
2080 .Div(S16,
2082
2083 addRulesForIOpcs({amdgcn_interp_mov}, Standard)
2084 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, Imm, SgprB32_M0}});
2085
2086 addRulesForIOpcs({amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
2087 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
2088 Standard)
2089 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2090 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2091
2092 addRulesForIOpcs({amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
2093 Standard)
2094 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2095 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2096
2097 addRulesForIOpcs({amdgcn_div_fmas}, Standard)
2098 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2099 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2100 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}})
2101 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}});
2102
2103 addRulesForIOpcs({amdgcn_div_fixup}, Standard)
2104 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2105 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2106 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2107 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2108 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}})
2109 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}});
2110
2111 addRulesForIOpcs({amdgcn_div_scale}, Standard)
2112 .Div(S32, {{Vgpr32, Vcc}, {IntrId, Vgpr32, Vgpr32}})
2113 .Uni(S32, {{UniInVgprS32, UniInVcc}, {IntrId, Vgpr32, Vgpr32}})
2114 .Div(S64, {{Vgpr64, Vcc}, {IntrId, Vgpr64, Vgpr64}})
2115 .Uni(S64, {{UniInVgprS64, UniInVcc}, {IntrId, Vgpr64, Vgpr64}});
2116
2117 addRulesForIOpcs({amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2}, Standard)
2119 .Div(S32, {{Vgpr32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
2120
2121 addRulesForIOpcs({amdgcn_fdot2_f16_f16}, Standard)
2123 .Div(S16, {{Vgpr16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr16}});
2124
2125 addRulesForIOpcs({amdgcn_sudot4, amdgcn_sudot8}, Standard)
2126 .Uni(S32, {{UniInVgprS32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}})
2127 .Div(S32, {{Vgpr32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}});
2128
2129 addRulesForIOpcs({amdgcn_s_alloc_vgpr})
2131
2132 addRulesForIOpcs({amdgcn_sat_pk4_i4_i8, amdgcn_sat_pk4_u4_u8}, Standard)
2133 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr32}})
2134 .Div(S16, {{Vgpr16}, {IntrId, Vgpr32}});
2135
2136 // TODO: Add handling for GFX90A+ which should use VGPRs instead of AGPRs.
2137 bool HasGFX90AInsts = ST->hasGFX90AInsts();
2138 addRulesForIOpcs({amdgcn_mfma_f32_32x32x1f32, amdgcn_mfma_f32_16x16x1f32,
2139 amdgcn_mfma_f32_4x4x1f32, amdgcn_mfma_f32_32x32x2f32,
2140 amdgcn_mfma_f32_16x16x4f32, amdgcn_mfma_f32_32x32x4f16,
2141 amdgcn_mfma_f32_16x16x4f16, amdgcn_mfma_f32_4x4x4f16,
2142 amdgcn_mfma_f32_32x32x8f16, amdgcn_mfma_f32_16x16x16f16,
2143 amdgcn_mfma_i32_32x32x4i8, amdgcn_mfma_i32_16x16x4i8,
2144 amdgcn_mfma_i32_4x4x4i8, amdgcn_mfma_i32_32x32x8i8,
2145 amdgcn_mfma_i32_16x16x16i8, amdgcn_mfma_f32_32x32x2bf16,
2146 amdgcn_mfma_f32_16x16x2bf16, amdgcn_mfma_f32_4x4x2bf16,
2147 amdgcn_mfma_f32_32x32x4bf16, amdgcn_mfma_f32_16x16x8bf16})
2148 .Any({{DivAnyTy},
2150 !HasGFX90AInsts);
2151
2152 // WMMA/SWMMAC intrinsics: all register operands map to VGPR.
2153 addRulesForIOpcs(
2154 {// WMMA GFX11+
2155 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
2156 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
2157 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
2158 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
2159 // WMMA GFX12
2160 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
2161 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
2162 amdgcn_wmma_i32_16x16x32_iu4,
2163 // WMMA GFX1250
2164 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
2165 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
2166 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
2167 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
2168 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
2169 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
2170 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
2171 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
2172 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
2173 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
2174 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
2175 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
2176 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
2177 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
2178 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
2179 // SWMMAC GFX12
2180 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
2181 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
2182 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
2183 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
2184 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
2185 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
2186 // SWMMAC GFX1250
2187 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
2188 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
2189 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
2190 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
2191 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
2192 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
2193 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})
2194 .Any({{}, {{}, {}, ApplyAllVgpr}});
2195
2196} // end initialize rules
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
bool isDivergentAtDef(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniformAtDef(ConstValueRefT V) const
Whether V is uniform/non-divergent at its definition.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39