LLVM 22.0.0git
AMDGPURegBankLegalizeHelper.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeHelper.cpp -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Implements actual lowering algorithms for each ID that can be used in
10/// Rule.OperandMapping. Similar to legalizer helper but with register banks.
11//
12//===----------------------------------------------------------------------===//
13
16#include "AMDGPUInstrInfo.h"
19#include "GCNSubtarget.h"
25#include "llvm/IR/IntrinsicsAMDGPU.h"
26
27#define DEBUG_TYPE "amdgpu-regbanklegalize"
28
29using namespace llvm;
30using namespace AMDGPU;
31
34 const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
35 : MF(B.getMF()), ST(MF.getSubtarget<GCNSubtarget>()), B(B),
36 MRI(*B.getMRI()), MUI(MUI), RBI(RBI), MORE(MF, nullptr),
37 RBLRules(RBLRules), IsWave32(ST.isWave32()),
38 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
39 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
40 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
41
43 const SetOfRulesForOpcode *RuleSet = RBLRules.getRulesForOpc(MI);
44 if (!RuleSet) {
45 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
46 "No AMDGPU RegBankLegalize rules defined for opcode",
47 MI);
48 return false;
49 }
50
51 const RegBankLLTMapping *Mapping = RuleSet->findMappingForMI(MI, MRI, MUI);
52 if (!Mapping) {
53 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
54 "AMDGPU RegBankLegalize: none of the rules defined with "
55 "'Any' for MI's opcode matched MI",
56 MI);
57 return false;
58 }
59
60 SmallSet<Register, 4> WaterfallSgprs;
61 unsigned OpIdx = 0;
62 if (Mapping->DstOpMapping.size() > 0) {
63 B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
64 if (!applyMappingDst(MI, OpIdx, Mapping->DstOpMapping))
65 return false;
66 }
67 if (Mapping->SrcOpMapping.size() > 0) {
68 B.setInstr(MI);
69 if (!applyMappingSrc(MI, OpIdx, Mapping->SrcOpMapping, WaterfallSgprs))
70 return false;
71 }
72
73 if (!lower(MI, *Mapping, WaterfallSgprs))
74 return false;
75
76 return true;
77}
78
79bool RegBankLegalizeHelper::executeInWaterfallLoop(
81 SmallSet<Register, 4> &SGPROperandRegs) {
82 // Track use registers which have already been expanded with a readfirstlane
83 // sequence. This may have multiple uses if moving a sequence.
84 DenseMap<Register, Register> WaterfalledRegMap;
85
86 MachineBasicBlock &MBB = B.getMBB();
87 MachineFunction &MF = B.getMF();
88
90 const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
91 unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
92 if (IsWave32) {
93 MovExecOpc = AMDGPU::S_MOV_B32;
94 MovExecTermOpc = AMDGPU::S_MOV_B32_term;
95 XorTermOpc = AMDGPU::S_XOR_B32_term;
96 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
97 ExecReg = AMDGPU::EXEC_LO;
98 } else {
99 MovExecOpc = AMDGPU::S_MOV_B64;
100 MovExecTermOpc = AMDGPU::S_MOV_B64_term;
101 XorTermOpc = AMDGPU::S_XOR_B64_term;
102 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
103 ExecReg = AMDGPU::EXEC;
104 }
105
106#ifndef NDEBUG
107 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
108#endif
109
110 MachineRegisterInfo &MRI = *B.getMRI();
111 Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
112 Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
113
114 // Don't bother using generic instructions/registers for the exec mask.
115 B.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(InitSaveExecReg);
116
117 Register SavedExec = MRI.createVirtualRegister(WaveRC);
118
119 // To insert the loop we need to split the block. Move everything before
120 // this point to a new block, and insert a new empty block before this
121 // instruction.
124 MachineBasicBlock *RestoreExecBB = MF.CreateMachineBasicBlock();
125 MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
127 ++MBBI;
128 MF.insert(MBBI, LoopBB);
129 MF.insert(MBBI, BodyBB);
130 MF.insert(MBBI, RestoreExecBB);
131 MF.insert(MBBI, RemainderBB);
132
133 LoopBB->addSuccessor(BodyBB);
134 BodyBB->addSuccessor(RestoreExecBB);
135 BodyBB->addSuccessor(LoopBB);
136
137 // Move the rest of the block into a new block.
139 RemainderBB->splice(RemainderBB->begin(), &MBB, Range.end(), MBB.end());
140
141 MBB.addSuccessor(LoopBB);
142 RestoreExecBB->addSuccessor(RemainderBB);
143
144 B.setInsertPt(*LoopBB, LoopBB->end());
145
146 // +-MBB:------------+
147 // | ... |
148 // | %0 = G_INST_1 |
149 // | %Dst = MI %Vgpr |
150 // | %1 = G_INST_2 |
151 // | ... |
152 // +-----------------+
153 // ->
154 // +-MBB-------------------------------+
155 // | ... |
156 // | %0 = G_INST_1 |
157 // | %SaveExecReg = S_MOV_B32 $exec_lo |
158 // +----------------|------------------+
159 // | /------------------------------|
160 // V V |
161 // +-LoopBB---------------------------------------------------------------+ |
162 // | %CurrentLaneReg:sgpr(s32) = READFIRSTLANE %Vgpr | |
163 // | instead of executing for each lane, see if other lanes had | |
164 // | same value for %Vgpr and execute for them also. | |
165 // | %CondReg:vcc(s1) = G_ICMP eq %CurrentLaneReg, %Vgpr | |
166 // | %CondRegLM:sreg_32 = ballot %CondReg // copy vcc to sreg32 lane mask | |
167 // | %SavedExec = S_AND_SAVEEXEC_B32 %CondRegLM | |
168 // | exec is active for lanes with the same "CurrentLane value" in Vgpr | |
169 // +----------------|-----------------------------------------------------+ |
170 // V |
171 // +-BodyBB------------------------------------------------------------+ |
172 // | %Dst = MI %CurrentLaneReg:sgpr(s32) | |
173 // | executed only for active lanes and written to Dst | |
174 // | $exec = S_XOR_B32 $exec, %SavedExec | |
175 // | set active lanes to 0 in SavedExec, lanes that did not write to | |
176 // | Dst yet, and set this as new exec (for READFIRSTLANE and ICMP) | |
177 // | SI_WATERFALL_LOOP LoopBB |-----|
178 // +----------------|--------------------------------------------------+
179 // V
180 // +-RestoreExecBB--------------------------+
181 // | $exec_lo = S_MOV_B32_term %SaveExecReg |
182 // +----------------|-----------------------+
183 // V
184 // +-RemainderBB:----------------------+
185 // | %1 = G_INST_2 |
186 // | ... |
187 // +---------------------------------- +
188
189 // Move the instruction into the loop body. Note we moved everything after
190 // Range.end() already into a new block, so Range.end() is no longer valid.
191 BodyBB->splice(BodyBB->end(), &MBB, Range.begin(), MBB.end());
192
193 // Figure out the iterator range after splicing the instructions.
194 MachineBasicBlock::iterator NewBegin = Range.begin()->getIterator();
195 auto NewEnd = BodyBB->end();
196 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
197
198 B.setMBB(*LoopBB);
199 Register CondReg;
200
201 for (MachineInstr &MI : make_range(NewBegin, NewEnd)) {
202 for (MachineOperand &Op : MI.all_uses()) {
203 Register OldReg = Op.getReg();
204 if (!SGPROperandRegs.count(OldReg))
205 continue;
206
207 // See if we already processed this register in another instruction in
208 // the sequence.
209 auto OldVal = WaterfalledRegMap.find(OldReg);
210 if (OldVal != WaterfalledRegMap.end()) {
211 Op.setReg(OldVal->second);
212 continue;
213 }
214
215 Register OpReg = Op.getReg();
216 LLT OpTy = MRI.getType(OpReg);
217
218 // TODO: support for agpr
219 assert(MRI.getRegBank(OpReg) == VgprRB);
220 Register CurrentLaneReg = MRI.createVirtualRegister({SgprRB, OpTy});
221 buildReadFirstLane(B, CurrentLaneReg, OpReg, RBI);
222
223 // Build the comparison(s), CurrentLaneReg == OpReg.
224 unsigned OpSize = OpTy.getSizeInBits();
225 unsigned PartSize = (OpSize % 64 == 0) ? 64 : 32;
226 LLT PartTy = LLT::scalar(PartSize);
227 unsigned NumParts = OpSize / PartSize;
229 SmallVector<Register, 8> CurrentLaneParts;
230
231 if (NumParts == 1) {
232 OpParts.push_back(OpReg);
233 CurrentLaneParts.push_back(CurrentLaneReg);
234 } else {
235 auto UnmergeOp = B.buildUnmerge({VgprRB, PartTy}, OpReg);
236 auto UnmergeCurrLane = B.buildUnmerge({SgprRB, PartTy}, CurrentLaneReg);
237 for (unsigned i = 0; i < NumParts; ++i) {
238 OpParts.push_back(UnmergeOp.getReg(i));
239 CurrentLaneParts.push_back(UnmergeCurrLane.getReg(i));
240 }
241 }
242
243 for (unsigned i = 0; i < NumParts; ++i) {
244 Register CmpReg = MRI.createVirtualRegister(VccRB_S1);
245 B.buildICmp(CmpInst::ICMP_EQ, CmpReg, CurrentLaneParts[i], OpParts[i]);
246
247 if (!CondReg)
248 CondReg = CmpReg;
249 else
250 CondReg = B.buildAnd(VccRB_S1, CondReg, CmpReg).getReg(0);
251 }
252
253 Op.setReg(CurrentLaneReg);
254
255 // Make sure we don't re-process this register again.
256 WaterfalledRegMap.insert(std::pair(OldReg, Op.getReg()));
257 }
258 }
259
260 // Copy vcc to sgpr32/64, ballot becomes a no-op during instruction selection.
261 Register CondRegLM =
262 MRI.createVirtualRegister({WaveRC, LLT::scalar(IsWave32 ? 32 : 64)});
263 B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
264
265 // Update EXEC, save the original EXEC value to SavedExec.
266 B.buildInstr(AndSaveExecOpc)
267 .addDef(SavedExec)
268 .addReg(CondRegLM, RegState::Kill);
269 MRI.setSimpleHint(SavedExec, CondRegLM);
270
271 B.setInsertPt(*BodyBB, BodyBB->end());
272
273 // Update EXEC, switch all done bits to 0 and all todo bits to 1.
274 B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
275
276 // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
277 // s_cbranch_scc0?
278
279 // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
280 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
281
282 // Save the EXEC mask before the loop.
283 B.setInsertPt(MBB, MBB.end());
284 B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
285
286 // Restore the EXEC mask after the loop.
287 B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
288 B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
289
290 // Set the insert point after the original instruction, so any new
291 // instructions will be in the remainder.
292 B.setInsertPt(*RemainderBB, RemainderBB->begin());
293
294 return true;
295}
296
297bool RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
298 ArrayRef<LLT> LLTBreakdown, LLT MergeTy) {
299 MachineFunction &MF = B.getMF();
300 assert(MI.getNumMemOperands() == 1);
301 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
302 Register Dst = MI.getOperand(0).getReg();
303 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
304 Register Base = MI.getOperand(1).getReg();
305 LLT PtrTy = MRI.getType(Base);
306 const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base);
307 LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
308 SmallVector<Register, 4> LoadPartRegs;
309
310 unsigned ByteOffset = 0;
311 for (LLT PartTy : LLTBreakdown) {
312 Register BasePlusOffset;
313 if (ByteOffset == 0) {
314 BasePlusOffset = Base;
315 } else {
316 auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
317 BasePlusOffset =
318 B.buildObjectPtrOffset({PtrRB, PtrTy}, Base, Offset).getReg(0);
319 }
320 auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
321 auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
322 LoadPartRegs.push_back(LoadPart.getReg(0));
323 ByteOffset += PartTy.getSizeInBytes();
324 }
325
326 if (!MergeTy.isValid()) {
327 // Loads are of same size, concat or merge them together.
328 B.buildMergeLikeInstr(Dst, LoadPartRegs);
329 } else {
330 // Loads are not all of same size, need to unmerge them to smaller pieces
331 // of MergeTy type, then merge pieces to Dst.
332 SmallVector<Register, 4> MergeTyParts;
333 for (Register Reg : LoadPartRegs) {
334 if (MRI.getType(Reg) == MergeTy) {
335 MergeTyParts.push_back(Reg);
336 } else {
337 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg);
338 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
339 MergeTyParts.push_back(Unmerge.getReg(i));
340 }
341 }
342 B.buildMergeLikeInstr(Dst, MergeTyParts);
343 }
344 MI.eraseFromParent();
345 return true;
346}
347
348bool RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
349 LLT MergeTy) {
350 MachineFunction &MF = B.getMF();
351 assert(MI.getNumMemOperands() == 1);
352 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
353 Register Dst = MI.getOperand(0).getReg();
354 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
355 Register Base = MI.getOperand(1).getReg();
356
357 MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy);
358 auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO);
359
360 if (WideTy.isScalar()) {
361 B.buildTrunc(Dst, WideLoad);
362 } else {
363 SmallVector<Register, 4> MergeTyParts;
364 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
365
366 LLT DstTy = MRI.getType(Dst);
367 unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits();
368 for (unsigned i = 0; i < NumElts; ++i) {
369 MergeTyParts.push_back(Unmerge.getReg(i));
370 }
371 B.buildMergeLikeInstr(Dst, MergeTyParts);
372 }
373 MI.eraseFromParent();
374 return true;
375}
376
377bool RegBankLegalizeHelper::widenMMOToS32(GAnyLoad &MI) const {
378 Register Dst = MI.getDstReg();
379 Register Ptr = MI.getPointerReg();
380 MachineMemOperand &MMO = MI.getMMO();
381 unsigned MemSize = 8 * MMO.getSize().getValue();
382
383 MachineMemOperand *WideMMO = B.getMF().getMachineMemOperand(&MMO, 0, S32);
384
385 if (MI.getOpcode() == G_LOAD) {
386 B.buildLoad(Dst, Ptr, *WideMMO);
387 } else {
388 auto Load = B.buildLoad(SgprRB_S32, Ptr, *WideMMO);
389
390 if (MI.getOpcode() == G_ZEXTLOAD) {
391 APInt Mask = APInt::getLowBitsSet(S32.getSizeInBits(), MemSize);
392 auto MaskCst = B.buildConstant(SgprRB_S32, Mask);
393 B.buildAnd(Dst, Load, MaskCst);
394 } else {
395 assert(MI.getOpcode() == G_SEXTLOAD);
396 B.buildSExtInReg(Dst, Load, MemSize);
397 }
398 }
399
400 MI.eraseFromParent();
401 return true;
402}
403
404bool RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
405 Register Dst = MI.getOperand(0).getReg();
406 LLT Ty = MRI.getType(Dst);
407 Register Src = MI.getOperand(1).getReg();
408 unsigned Opc = MI.getOpcode();
409 int TrueExtCst = Opc == G_SEXT ? -1 : 1;
410 if (Ty == S32 || Ty == S16) {
411 auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
412 auto False = B.buildConstant({VgprRB, Ty}, 0);
413 B.buildSelect(Dst, Src, True, False);
414 } else if (Ty == S64) {
415 auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
416 auto False = B.buildConstant({VgprRB_S32}, 0);
417 auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
418 MachineInstrBuilder Hi;
419 switch (Opc) {
420 case G_SEXT:
421 Hi = Lo;
422 break;
423 case G_ZEXT:
424 Hi = False;
425 break;
426 case G_ANYEXT:
427 Hi = B.buildUndef({VgprRB_S32});
428 break;
429 default:
431 MF, MORE, "amdgpu-regbanklegalize",
432 "AMDGPU RegBankLegalize: lowerVccExtToSel, Opcode not supported", MI);
433 return false;
434 }
435
436 B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
437 } else {
439 MF, MORE, "amdgpu-regbanklegalize",
440 "AMDGPU RegBankLegalize: lowerVccExtToSel, Type not supported", MI);
441 return false;
442 }
443
444 MI.eraseFromParent();
445 return true;
446}
447
448std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
449 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
450 auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
451 auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
452 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
453 return {Lo.getReg(0), Hi.getReg(0)};
454}
455
456std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
457 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
458 auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
459 auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
460 return {Lo.getReg(0), Hi.getReg(0)};
461}
462
463std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
464 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
465 auto Lo = PackedS32;
466 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
467 return {Lo.getReg(0), Hi.getReg(0)};
468}
469
470std::pair<Register, Register>
471RegBankLegalizeHelper::unpackAExtTruncS16(Register Reg) {
472 auto [Lo32, Hi32] = unpackAExt(Reg);
473 return {B.buildTrunc(SgprRB_S16, Lo32).getReg(0),
474 B.buildTrunc(SgprRB_S16, Hi32).getReg(0)};
475}
476
477bool RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
478 Register Lo, Hi;
479 switch (MI.getOpcode()) {
480 case AMDGPU::G_SHL: {
481 auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
482 auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
483 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
484 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
485 break;
486 }
487 case AMDGPU::G_LSHR: {
488 auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
489 auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
490 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
491 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
492 break;
493 }
494 case AMDGPU::G_ASHR: {
495 auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
496 auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
497 Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
498 Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
499 break;
500 }
501 default:
503 MF, MORE, "amdgpu-regbanklegalize",
504 "AMDGPU RegBankLegalize: lowerUnpackBitShift, case not implemented",
505 MI);
506 return false;
507 }
508 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
509 MI.eraseFromParent();
510 return true;
511}
512
513bool RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
514 Register Lo, Hi;
515 switch (MI.getOpcode()) {
516 case AMDGPU::G_SMIN:
517 case AMDGPU::G_SMAX: {
518 // For signed operations, use sign extension
519 auto [Val0_Lo, Val0_Hi] = unpackSExt(MI.getOperand(1).getReg());
520 auto [Val1_Lo, Val1_Hi] = unpackSExt(MI.getOperand(2).getReg());
521 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
522 .getReg(0);
523 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
524 .getReg(0);
525 break;
526 }
527 case AMDGPU::G_UMIN:
528 case AMDGPU::G_UMAX: {
529 // For unsigned operations, use zero extension
530 auto [Val0_Lo, Val0_Hi] = unpackZExt(MI.getOperand(1).getReg());
531 auto [Val1_Lo, Val1_Hi] = unpackZExt(MI.getOperand(2).getReg());
532 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Lo, Val1_Lo})
533 .getReg(0);
534 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0_Hi, Val1_Hi})
535 .getReg(0);
536 break;
537 }
538 default:
540 MF, MORE, "amdgpu-regbanklegalize",
541 "AMDGPU RegBankLegalize: lowerUnpackMinMax, case not implemented", MI);
542 return false;
543 }
544 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
545 MI.eraseFromParent();
546 return true;
547}
548
549bool RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) {
550 auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg());
551 auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg());
552 auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo});
553 auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi});
554 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
555 {ResLo.getReg(0), ResHi.getReg(0)});
556 MI.eraseFromParent();
557 return true;
558}
559
562 return (GI->is(Intrinsic::amdgcn_sbfe));
563
564 return MI.getOpcode() == AMDGPU::G_SBFX;
565}
566
567bool RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
568 Register Dst = MI.getOperand(0).getReg();
569 assert(MRI.getType(Dst) == LLT::scalar(64));
570 bool Signed = isSignedBFE(MI);
571 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
572 // Extract bitfield from Src, LSBit is the least-significant bit for the
573 // extraction (field offset) and Width is size of bitfield.
574 Register Src = MI.getOperand(FirstOpnd).getReg();
575 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
576 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
577 // Comments are for signed bitfield extract, similar for unsigned. x is sign
578 // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
579
580 // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
581 unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
582 auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
583
584 auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
585
586 // Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
587 // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
588 // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
589 if (!ConstWidth) {
590 auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
591 auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
592 B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
593 MI.eraseFromParent();
594 return true;
595 }
596
597 uint64_t WidthImm = ConstWidth->Value.getZExtValue();
598 auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
599 Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
600 Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
601 auto Zero = B.buildConstant({VgprRB, S32}, 0);
602 unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
603
604 if (WidthImm <= 32) {
605 // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
606 auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
607 MachineInstrBuilder Hi;
608 if (Signed) {
609 // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
610 Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
611 } else {
612 // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
613 Hi = Zero;
614 }
615 B.buildMergeLikeInstr(Dst, {Lo, Hi});
616 } else {
617 auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
618 // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
619 auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
620 B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
621 }
622
623 MI.eraseFromParent();
624 return true;
625}
626
627bool RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
628 Register DstReg = MI.getOperand(0).getReg();
629 LLT Ty = MRI.getType(DstReg);
630 bool Signed = isSignedBFE(MI);
631 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
632 Register Src = MI.getOperand(FirstOpnd).getReg();
633 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
634 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
635 // For uniform bit field extract there are 4 available instructions, but
636 // LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
637 // field offset in low and size in high 16 bits.
638
639 // Src1 Hi16|Lo16 = Size|FieldOffset
640 auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
641 auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
642 auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
643 auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
644 unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
645 unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
646 unsigned Opc = Ty == S32 ? Opc32 : Opc64;
647
648 // Select machine instruction, because of reg class constraining, insert
649 // copies from reg class to reg bank.
650 auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
651 {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
652 if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
653 *ST.getRegisterInfo(), RBI)) {
655 MF, MORE, "amdgpu-regbanklegalize",
656 "AMDGPU RegBankLegalize: lowerS_BFE, failed to constrain BFE", MI);
657 return false;
658 }
659
660 B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
661 MI.eraseFromParent();
662 return true;
663}
664
665bool RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
666 Register Dst = MI.getOperand(0).getReg();
667 LLT DstTy = MRI.getType(Dst);
668 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
669 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
670 auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
671 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
672 unsigned Opc = MI.getOpcode();
673 auto Flags = MI.getFlags();
674 auto Lo =
675 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
676 auto Hi =
677 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
678 B.buildMergeLikeInstr(Dst, {Lo, Hi});
679 MI.eraseFromParent();
680 return true;
681}
682
683bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
684 Register Dst = MI.getOperand(0).getReg();
685 assert(MRI.getType(Dst) == V2S16);
686 unsigned Opc = MI.getOpcode();
687 unsigned NumOps = MI.getNumOperands();
688 auto Flags = MI.getFlags();
689
690 auto [Op1Lo, Op1Hi] = unpackAExtTruncS16(MI.getOperand(1).getReg());
691
692 if (NumOps == 2) {
693 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo}, Flags);
694 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi}, Flags);
695 B.buildMergeLikeInstr(Dst, {Lo, Hi});
696 MI.eraseFromParent();
697 return true;
698 }
699
700 auto [Op2Lo, Op2Hi] = unpackAExtTruncS16(MI.getOperand(2).getReg());
701
702 if (NumOps == 3) {
703 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo}, Flags);
704 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi}, Flags);
705 B.buildMergeLikeInstr(Dst, {Lo, Hi});
706 MI.eraseFromParent();
707 return true;
708 }
709
710 assert(NumOps == 4);
711 auto [Op3Lo, Op3Hi] = unpackAExtTruncS16(MI.getOperand(3).getReg());
712 auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Op1Lo, Op2Lo, Op3Lo}, Flags);
713 auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Op1Hi, Op2Hi, Op3Hi}, Flags);
714 B.buildMergeLikeInstr(Dst, {Lo, Hi});
715 MI.eraseFromParent();
716 return true;
717}
718
719bool RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
720 Register Dst = MI.getOperand(0).getReg();
721 LLT DstTy = MRI.getType(Dst);
722 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
723 (DstTy.isPointer() && DstTy.getSizeInBits() == 64));
724 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
725 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
726 auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
727 Register Cond = MI.getOperand(1).getReg();
728 auto Flags = MI.getFlags();
729 auto Lo =
730 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
731 auto Hi =
732 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
733
734 B.buildMergeLikeInstr(Dst, {Lo, Hi});
735 MI.eraseFromParent();
736 return true;
737}
738
739bool RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
740 auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
741 int Amt = MI.getOperand(2).getImm();
742 Register Lo, Hi;
743 // Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend
744 if (Amt <= 32) {
745 auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
746 if (Amt == 32) {
747 // Hi|Lo: ????????|sxxxxxxx -> ssssssss|sxxxxxxx
748 Lo = Freeze.getReg(0);
749 } else {
750 // Hi|Lo: ????????|???sxxxx -> ssssssss|ssssxxxx
751 Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
752 }
753
754 auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
755 Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
756 } else {
757 // Hi|Lo: ?????sxx|xxxxxxxx -> ssssssxx|xxxxxxxx
758 Lo = Op1.getReg(0);
759 Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
760 }
761
762 B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
763 MI.eraseFromParent();
764 return true;
765}
766
767bool RegBankLegalizeHelper::lower(MachineInstr &MI,
768 const RegBankLLTMapping &Mapping,
769 SmallSet<Register, 4> &WaterfallSgprs) {
770
771 switch (Mapping.LoweringMethod) {
772 case DoNotLower:
773 break;
774 case VccExtToSel:
775 return lowerVccExtToSel(MI);
776 case UniExtToSel: {
777 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
778 auto True = B.buildConstant({SgprRB, Ty},
779 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
780 auto False = B.buildConstant({SgprRB, Ty}, 0);
781 // Input to G_{Z|S}EXT is 'Legalizer legal' S1. Most common case is compare.
782 // We are making select here. S1 cond was already 'any-extended to S32' +
783 // 'AND with 1 to clean high bits' by Sgpr32AExtBoolInReg.
784 B.buildSelect(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), True,
785 False);
786 MI.eraseFromParent();
787 return true;
788 }
789 case UnpackBitShift:
790 return lowerUnpackBitShift(MI);
791 case UnpackMinMax:
792 return lowerUnpackMinMax(MI);
793 case ScalarizeToS16:
794 return lowerSplitTo16(MI);
795 case Ext32To64: {
796 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
797 MachineInstrBuilder Hi;
798 switch (MI.getOpcode()) {
799 case AMDGPU::G_ZEXT: {
800 Hi = B.buildConstant({RB, S32}, 0);
801 break;
802 }
803 case AMDGPU::G_SEXT: {
804 // Replicate sign bit from 32-bit extended part.
805 auto ShiftAmt = B.buildConstant({RB, S32}, 31);
806 Hi = B.buildAShr({RB, S32}, MI.getOperand(1).getReg(), ShiftAmt);
807 break;
808 }
809 case AMDGPU::G_ANYEXT: {
810 Hi = B.buildUndef({RB, S32});
811 break;
812 }
813 default:
814 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
815 "AMDGPU RegBankLegalize: Ext32To64, unsuported opcode",
816 MI);
817 return false;
818 }
819
820 B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
821 {MI.getOperand(1).getReg(), Hi});
822 MI.eraseFromParent();
823 return true;
824 }
825 case UniCstExt: {
826 uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();
827 B.buildConstant(MI.getOperand(0).getReg(), ConstVal);
828
829 MI.eraseFromParent();
830 return true;
831 }
832 case VgprToVccCopy: {
833 Register Src = MI.getOperand(1).getReg();
834 LLT Ty = MRI.getType(Src);
835 // Take lowest bit from each lane and put it in lane mask.
836 // Lowering via compare, but we need to clean high bits first as compare
837 // compares all bits in register.
838 Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty});
839 if (Ty == S64) {
840 auto Src64 = B.buildUnmerge(VgprRB_S32, Src);
841 auto One = B.buildConstant(VgprRB_S32, 1);
842 auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
843 auto Zero = B.buildConstant(VgprRB_S32, 0);
844 auto AndHi = B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
845 B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
846 } else {
847 assert(Ty == S32 || Ty == S16);
848 auto One = B.buildConstant({VgprRB, Ty}, 1);
849 B.buildAnd(BoolSrc, Src, One);
850 }
851 auto Zero = B.buildConstant({VgprRB, Ty}, 0);
852 B.buildICmp(CmpInst::ICMP_NE, MI.getOperand(0).getReg(), BoolSrc, Zero);
853 MI.eraseFromParent();
854 return true;
855 }
856 case V_BFE:
857 return lowerV_BFE(MI);
858 case S_BFE:
859 return lowerS_BFE(MI);
860 case SplitTo32:
861 return lowerSplitTo32(MI);
862 case SplitTo32Select:
863 return lowerSplitTo32Select(MI);
865 return lowerSplitTo32SExtInReg(MI);
866 case SplitLoad: {
867 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
868 unsigned Size = DstTy.getSizeInBits();
869 // Even split to 128-bit loads
870 if (Size > 128) {
871 LLT B128;
872 if (DstTy.isVector()) {
873 LLT EltTy = DstTy.getElementType();
874 B128 = LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy);
875 } else {
876 B128 = LLT::scalar(128);
877 }
878 if (Size / 128 == 2)
879 splitLoad(MI, {B128, B128});
880 else if (Size / 128 == 4)
881 splitLoad(MI, {B128, B128, B128, B128});
882 else {
883 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
884 "AMDGPU RegBankLegalize: SplitLoad, unsuported type",
885 MI);
886 return false;
887 }
888 }
889 // 64 and 32 bit load
890 else if (DstTy == S96)
891 splitLoad(MI, {S64, S32}, S32);
892 else if (DstTy == V3S32)
893 splitLoad(MI, {V2S32, S32}, S32);
894 else if (DstTy == V6S16)
895 splitLoad(MI, {V4S16, V2S16}, V2S16);
896 else {
897 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
898 "AMDGPU RegBankLegalize: SplitLoad, unsuported type",
899 MI);
900 return false;
901 }
902 return true;
903 }
904 case WidenLoad: {
905 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
906 if (DstTy == S96)
907 widenLoad(MI, S128);
908 else if (DstTy == V3S32)
909 widenLoad(MI, V4S32, S32);
910 else if (DstTy == V6S16)
911 widenLoad(MI, V8S16, V2S16);
912 else {
913 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
914 "AMDGPU RegBankLegalize: WidenLoad, unsuported type",
915 MI);
916 return false;
917 }
918 return true;
919 }
920 case UnpackAExt:
921 return lowerUnpackAExt(MI);
922 case WidenMMOToS32:
923 return widenMMOToS32(cast<GAnyLoad>(MI));
924 }
925
926 if (!WaterfallSgprs.empty()) {
927 MachineBasicBlock::iterator I = MI.getIterator();
928 if (!executeInWaterfallLoop(B, make_range(I, std::next(I)), WaterfallSgprs))
929 return false;
930 }
931 return true;
932}
933
934LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
935 switch (ID) {
936 case Vcc:
937 case UniInVcc:
938 return LLT::scalar(1);
939 case Sgpr16:
940 case Vgpr16:
941 case UniInVgprS16:
942 return LLT::scalar(16);
943 case Sgpr32:
944 case Sgpr32_WF:
945 case Sgpr32Trunc:
946 case Sgpr32AExt:
948 case Sgpr32SExt:
949 case Sgpr32ZExt:
950 case UniInVgprS32:
951 case Vgpr32:
952 case Vgpr32SExt:
953 case Vgpr32ZExt:
954 return LLT::scalar(32);
955 case Sgpr64:
956 case Vgpr64:
957 case UniInVgprS64:
958 return LLT::scalar(64);
959 case Sgpr128:
960 case Vgpr128:
961 return LLT::scalar(128);
962 case SgprP0:
963 case VgprP0:
964 return LLT::pointer(0, 64);
965 case SgprP1:
966 case VgprP1:
967 return LLT::pointer(1, 64);
968 case SgprP3:
969 case VgprP3:
970 return LLT::pointer(3, 32);
971 case SgprP4:
972 case VgprP4:
973 return LLT::pointer(4, 64);
974 case SgprP5:
975 case VgprP5:
976 return LLT::pointer(5, 32);
977 case SgprP8:
978 return LLT::pointer(8, 128);
979 case SgprV2S16:
980 case VgprV2S16:
981 case UniInVgprV2S16:
982 return LLT::fixed_vector(2, 16);
983 case SgprV2S32:
984 case VgprV2S32:
985 case UniInVgprV2S32:
986 return LLT::fixed_vector(2, 32);
987 case SgprV4S32:
988 case SgprV4S32_WF:
989 case VgprV4S32:
990 case UniInVgprV4S32:
991 return LLT::fixed_vector(4, 32);
992 default:
993 return LLT();
994 }
995}
996
997LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
998 switch (ID) {
999 case SgprB32:
1000 case VgprB32:
1001 case UniInVgprB32:
1002 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
1003 isAnyPtr(Ty, 32))
1004 return Ty;
1005 return LLT();
1006 case SgprPtr32:
1007 case VgprPtr32:
1008 return isAnyPtr(Ty, 32) ? Ty : LLT();
1009 case SgprPtr64:
1010 case VgprPtr64:
1011 return isAnyPtr(Ty, 64) ? Ty : LLT();
1012 case SgprPtr128:
1013 case VgprPtr128:
1014 return isAnyPtr(Ty, 128) ? Ty : LLT();
1015 case SgprB64:
1016 case VgprB64:
1017 case UniInVgprB64:
1018 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
1019 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
1020 return Ty;
1021 return LLT();
1022 case SgprB96:
1023 case VgprB96:
1024 case UniInVgprB96:
1025 if (Ty == LLT::scalar(96) || Ty == LLT::fixed_vector(3, 32) ||
1026 Ty == LLT::fixed_vector(6, 16))
1027 return Ty;
1028 return LLT();
1029 case SgprB128:
1030 case VgprB128:
1031 case UniInVgprB128:
1032 if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) ||
1033 Ty == LLT::fixed_vector(2, 64) || isAnyPtr(Ty, 128))
1034 return Ty;
1035 return LLT();
1036 case SgprB256:
1037 case VgprB256:
1038 case UniInVgprB256:
1039 if (Ty == LLT::scalar(256) || Ty == LLT::fixed_vector(8, 32) ||
1040 Ty == LLT::fixed_vector(4, 64) || Ty == LLT::fixed_vector(16, 16))
1041 return Ty;
1042 return LLT();
1043 case SgprB512:
1044 case VgprB512:
1045 case UniInVgprB512:
1046 if (Ty == LLT::scalar(512) || Ty == LLT::fixed_vector(16, 32) ||
1047 Ty == LLT::fixed_vector(8, 64))
1048 return Ty;
1049 return LLT();
1050 default:
1051 return LLT();
1052 }
1053}
1054
1055const RegisterBank *
1056RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
1057 switch (ID) {
1058 case Vcc:
1059 return VccRB;
1060 case Sgpr16:
1061 case Sgpr32:
1062 case Sgpr32_WF:
1063 case Sgpr64:
1064 case Sgpr128:
1065 case SgprP0:
1066 case SgprP1:
1067 case SgprP3:
1068 case SgprP4:
1069 case SgprP5:
1070 case SgprP8:
1071 case SgprPtr32:
1072 case SgprPtr64:
1073 case SgprPtr128:
1074 case SgprV2S16:
1075 case SgprV2S32:
1076 case SgprV4S32:
1077 case SgprV4S32_WF:
1078 case SgprB32:
1079 case SgprB64:
1080 case SgprB96:
1081 case SgprB128:
1082 case SgprB256:
1083 case SgprB512:
1084 case UniInVcc:
1085 case UniInVgprS16:
1086 case UniInVgprS32:
1087 case UniInVgprS64:
1088 case UniInVgprV2S16:
1089 case UniInVgprV2S32:
1090 case UniInVgprV4S32:
1091 case UniInVgprB32:
1092 case UniInVgprB64:
1093 case UniInVgprB96:
1094 case UniInVgprB128:
1095 case UniInVgprB256:
1096 case UniInVgprB512:
1097 case Sgpr32Trunc:
1098 case Sgpr32AExt:
1100 case Sgpr32SExt:
1101 case Sgpr32ZExt:
1102 return SgprRB;
1103 case Vgpr16:
1104 case Vgpr32:
1105 case Vgpr64:
1106 case Vgpr128:
1107 case VgprP0:
1108 case VgprP1:
1109 case VgprP3:
1110 case VgprP4:
1111 case VgprP5:
1112 case VgprPtr32:
1113 case VgprPtr64:
1114 case VgprPtr128:
1115 case VgprV2S16:
1116 case VgprV2S32:
1117 case VgprV4S32:
1118 case VgprB32:
1119 case VgprB64:
1120 case VgprB96:
1121 case VgprB128:
1122 case VgprB256:
1123 case VgprB512:
1124 case Vgpr32SExt:
1125 case Vgpr32ZExt:
1126 return VgprRB;
1127 default:
1128 return nullptr;
1129 }
1130}
1131
1132bool RegBankLegalizeHelper::applyMappingDst(
1133 MachineInstr &MI, unsigned &OpIdx,
1134 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs) {
1135 // Defs start from operand 0
1136 for (; OpIdx < MethodIDs.size(); ++OpIdx) {
1137 if (MethodIDs[OpIdx] == None)
1138 continue;
1139 MachineOperand &Op = MI.getOperand(OpIdx);
1140 Register Reg = Op.getReg();
1141 LLT Ty = MRI.getType(Reg);
1142 [[maybe_unused]] const RegisterBank *RB = MRI.getRegBank(Reg);
1143
1144 switch (MethodIDs[OpIdx]) {
1145 // vcc, sgpr and vgpr scalars, pointers and vectors
1146 case Vcc:
1147 case Sgpr16:
1148 case Sgpr32:
1149 case Sgpr64:
1150 case Sgpr128:
1151 case SgprP0:
1152 case SgprP1:
1153 case SgprP3:
1154 case SgprP4:
1155 case SgprP5:
1156 case SgprP8:
1157 case SgprV2S16:
1158 case SgprV2S32:
1159 case SgprV4S32:
1160 case Vgpr16:
1161 case Vgpr32:
1162 case Vgpr64:
1163 case Vgpr128:
1164 case VgprP0:
1165 case VgprP1:
1166 case VgprP3:
1167 case VgprP4:
1168 case VgprP5:
1169 case VgprV2S16:
1170 case VgprV2S32:
1171 case VgprV4S32: {
1172 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1173 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
1174 break;
1175 }
1176 // sgpr and vgpr B-types
1177 case SgprB32:
1178 case SgprB64:
1179 case SgprB96:
1180 case SgprB128:
1181 case SgprB256:
1182 case SgprB512:
1183 case SgprPtr32:
1184 case SgprPtr64:
1185 case SgprPtr128:
1186 case VgprB32:
1187 case VgprB64:
1188 case VgprB96:
1189 case VgprB128:
1190 case VgprB256:
1191 case VgprB512:
1192 case VgprPtr32:
1193 case VgprPtr64:
1194 case VgprPtr128: {
1195 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
1196 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
1197 break;
1198 }
1199 // uniform in vcc/vgpr: scalars, vectors and B-types
1200 case UniInVcc: {
1201 assert(Ty == S1);
1202 assert(RB == SgprRB);
1203 Register NewDst = MRI.createVirtualRegister(VccRB_S1);
1204 Op.setReg(NewDst);
1205 auto CopyS32_Vcc =
1206 B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
1207 B.buildTrunc(Reg, CopyS32_Vcc);
1208 break;
1209 }
1210 case UniInVgprS16: {
1211 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1212 assert(RB == SgprRB);
1213 Register NewVgprDstS16 = MRI.createVirtualRegister({VgprRB, S16});
1214 Register NewVgprDstS32 = MRI.createVirtualRegister({VgprRB, S32});
1215 Register NewSgprDstS32 = MRI.createVirtualRegister({SgprRB, S32});
1216 Op.setReg(NewVgprDstS16);
1217 B.buildAnyExt(NewVgprDstS32, NewVgprDstS16);
1218 buildReadAnyLane(B, NewSgprDstS32, NewVgprDstS32, RBI);
1219 B.buildTrunc(Reg, NewSgprDstS32);
1220 break;
1221 }
1222 case UniInVgprS32:
1223 case UniInVgprS64:
1224 case UniInVgprV2S16:
1225 case UniInVgprV2S32:
1226 case UniInVgprV4S32: {
1227 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
1228 assert(RB == SgprRB);
1229 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1230 Op.setReg(NewVgprDst);
1231 buildReadAnyLane(B, Reg, NewVgprDst, RBI);
1232 break;
1233 }
1234 case UniInVgprB32:
1235 case UniInVgprB64:
1236 case UniInVgprB96:
1237 case UniInVgprB128:
1238 case UniInVgprB256:
1239 case UniInVgprB512: {
1240 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
1241 assert(RB == SgprRB);
1242 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
1243 Op.setReg(NewVgprDst);
1244 AMDGPU::buildReadAnyLane(B, Reg, NewVgprDst, RBI);
1245 break;
1246 }
1247 // sgpr trunc
1248 case Sgpr32Trunc: {
1249 assert(Ty.getSizeInBits() < 32);
1250 assert(RB == SgprRB);
1251 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1252 Op.setReg(NewDst);
1253 if (!MRI.use_empty(Reg))
1254 B.buildTrunc(Reg, NewDst);
1255 break;
1256 }
1257 case InvalidMapping: {
1259 MF, MORE, "amdgpu-regbanklegalize",
1260 "AMDGPU RegBankLegalize: missing fast rule ('Div' or 'Uni') for", MI);
1261 return false;
1262 }
1263 default:
1265 MF, MORE, "amdgpu-regbanklegalize",
1266 "AMDGPU RegBankLegalize: applyMappingDst, ID not supported", MI);
1267 return false;
1268 }
1269 }
1270
1271 return true;
1272}
1273
1274bool RegBankLegalizeHelper::applyMappingSrc(
1275 MachineInstr &MI, unsigned &OpIdx,
1276 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
1277 SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
1278 for (unsigned i = 0; i < MethodIDs.size(); ++OpIdx, ++i) {
1279 if (MethodIDs[i] == None || MethodIDs[i] == IntrId || MethodIDs[i] == Imm)
1280 continue;
1281
1282 MachineOperand &Op = MI.getOperand(OpIdx);
1283 Register Reg = Op.getReg();
1284 LLT Ty = MRI.getType(Reg);
1285 const RegisterBank *RB = MRI.getRegBank(Reg);
1286
1287 switch (MethodIDs[i]) {
1288 case Vcc: {
1289 assert(Ty == S1);
1290 assert(RB == VccRB || RB == SgprRB);
1291 if (RB == SgprRB) {
1292 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1293 auto CopyVcc_Scc =
1294 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
1295 Op.setReg(CopyVcc_Scc.getReg(0));
1296 }
1297 break;
1298 }
1299 // sgpr scalars, pointers and vectors
1300 case Sgpr16:
1301 case Sgpr32:
1302 case Sgpr64:
1303 case Sgpr128:
1304 case SgprP0:
1305 case SgprP1:
1306 case SgprP3:
1307 case SgprP4:
1308 case SgprP5:
1309 case SgprP8:
1310 case SgprV2S16:
1311 case SgprV2S32:
1312 case SgprV4S32: {
1313 assert(Ty == getTyFromID(MethodIDs[i]));
1314 assert(RB == getRegBankFromID(MethodIDs[i]));
1315 break;
1316 }
1317 // sgpr B-types
1318 case SgprB32:
1319 case SgprB64:
1320 case SgprB96:
1321 case SgprB128:
1322 case SgprB256:
1323 case SgprB512:
1324 case SgprPtr32:
1325 case SgprPtr64:
1326 case SgprPtr128: {
1327 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1328 assert(RB == getRegBankFromID(MethodIDs[i]));
1329 break;
1330 }
1331 // vgpr scalars, pointers and vectors
1332 case Vgpr16:
1333 case Vgpr32:
1334 case Vgpr64:
1335 case Vgpr128:
1336 case VgprP0:
1337 case VgprP1:
1338 case VgprP3:
1339 case VgprP4:
1340 case VgprP5:
1341 case VgprV2S16:
1342 case VgprV2S32:
1343 case VgprV4S32: {
1344 assert(Ty == getTyFromID(MethodIDs[i]));
1345 if (RB != VgprRB) {
1346 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
1347 Op.setReg(CopyToVgpr.getReg(0));
1348 }
1349 break;
1350 }
1351 // vgpr B-types
1352 case VgprB32:
1353 case VgprB64:
1354 case VgprB96:
1355 case VgprB128:
1356 case VgprB256:
1357 case VgprB512:
1358 case VgprPtr32:
1359 case VgprPtr64:
1360 case VgprPtr128: {
1361 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
1362 if (RB != VgprRB) {
1363 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
1364 Op.setReg(CopyToVgpr.getReg(0));
1365 }
1366 break;
1367 }
1368 // sgpr waterfall, scalars and vectors
1369 case Sgpr32_WF:
1370 case SgprV4S32_WF: {
1371 assert(Ty == getTyFromID(MethodIDs[i]));
1372 if (RB != SgprRB)
1373 SgprWaterfallOperandRegs.insert(Reg);
1374 break;
1375 }
1376 // sgpr and vgpr scalars with extend
1377 case Sgpr32AExt: {
1378 // Note: this ext allows S1, and it is meant to be combined away.
1379 assert(Ty.getSizeInBits() < 32);
1380 assert(RB == SgprRB);
1381 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1382 Op.setReg(Aext.getReg(0));
1383 break;
1384 }
1385 case Sgpr32AExtBoolInReg: {
1386 // Note: this ext allows S1, and it is meant to be combined away.
1387 assert(Ty.getSizeInBits() == 1);
1388 assert(RB == SgprRB);
1389 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
1390 // Zext SgprS1 is not legal, make AND with 1 instead. This instruction is
1391 // most of times meant to be combined away in AMDGPURegBankCombiner.
1392 auto Cst1 = B.buildConstant(SgprRB_S32, 1);
1393 auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
1394 Op.setReg(BoolInReg.getReg(0));
1395 break;
1396 }
1397 case Sgpr32SExt: {
1398 assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
1399 assert(RB == SgprRB);
1400 auto Sext = B.buildSExt(SgprRB_S32, Reg);
1401 Op.setReg(Sext.getReg(0));
1402 break;
1403 }
1404 case Sgpr32ZExt: {
1405 assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
1406 assert(RB == SgprRB);
1407 auto Zext = B.buildZExt({SgprRB, S32}, Reg);
1408 Op.setReg(Zext.getReg(0));
1409 break;
1410 }
1411 case Vgpr32SExt: {
1412 // Note this ext allows S1, and it is meant to be combined away.
1413 assert(Ty.getSizeInBits() < 32);
1414 assert(RB == VgprRB);
1415 auto Sext = B.buildSExt({VgprRB, S32}, Reg);
1416 Op.setReg(Sext.getReg(0));
1417 break;
1418 }
1419 case Vgpr32ZExt: {
1420 // Note this ext allows S1, and it is meant to be combined away.
1421 assert(Ty.getSizeInBits() < 32);
1422 assert(RB == VgprRB);
1423 auto Zext = B.buildZExt({VgprRB, S32}, Reg);
1424 Op.setReg(Zext.getReg(0));
1425 break;
1426 }
1427 default:
1429 MF, MORE, "amdgpu-regbanklegalize",
1430 "AMDGPU RegBankLegalize: applyMappingSrc, ID not supported", MI);
1431 return false;
1432 }
1433 }
1434 return true;
1435}
1436
1438 Register Dst = MI.getOperand(0).getReg();
1439 LLT Ty = MRI.getType(Dst);
1440
1441 if (Ty == LLT::scalar(1) && MUI.isUniform(Dst)) {
1442 B.setInsertPt(*MI.getParent(), MI.getParent()->getFirstNonPHI());
1443
1444 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
1445 MI.getOperand(0).setReg(NewDst);
1446 B.buildTrunc(Dst, NewDst);
1447
1448 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1449 Register UseReg = MI.getOperand(i).getReg();
1450
1451 auto DefMI = MRI.getVRegDef(UseReg)->getIterator();
1452 MachineBasicBlock *DefMBB = DefMI->getParent();
1453
1454 B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
1455
1456 auto NewUse = B.buildAnyExt(SgprRB_S32, UseReg);
1457 MI.getOperand(i).setReg(NewUse.getReg(0));
1458 }
1459
1460 return true;
1461 }
1462
1463 // ALL divergent i1 phis should have been lowered and inst-selected into PHI
1464 // with sgpr reg class and S1 LLT in AMDGPUGlobalISelDivergenceLowering pass.
1465 // Note: this includes divergent phis that don't require lowering.
1466 if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) {
1467 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
1468 "AMDGPU RegBankLegalize: Can't lower divergent S1 G_PHI",
1469 MI);
1470 return false;
1471 }
1472
1473 // We accept all types that can fit in some register class.
1474 // Uniform G_PHIs have all sgpr registers.
1475 // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
1476 if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
1477 Ty == LLT::pointer(4, 64)) {
1478 return true;
1479 }
1480
1481 reportGISelFailure(MF, MORE, "amdgpu-regbanklegalize",
1482 "AMDGPU RegBankLegalize: type not supported for G_PHI",
1483 MI);
1484 return false;
1485}
1486
1487[[maybe_unused]] static bool verifyRegBankOnOperands(MachineInstr &MI,
1488 const RegisterBank *RB,
1490 unsigned StartOpIdx,
1491 unsigned EndOpIdx) {
1492 for (unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1493 if (MRI.getRegBankOrNull(MI.getOperand(i).getReg()) != RB)
1494 return false;
1495 }
1496 return true;
1497}
1498
1500 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
1501 // Put RB on all registers
1502 unsigned NumDefs = MI.getNumDefs();
1503 unsigned NumOperands = MI.getNumOperands();
1504
1505 assert(verifyRegBankOnOperands(MI, RB, MRI, 0, NumDefs - 1));
1506 if (RB == SgprRB)
1507 assert(verifyRegBankOnOperands(MI, RB, MRI, NumDefs, NumOperands - 1));
1508
1509 if (RB == VgprRB) {
1510 B.setInstr(MI);
1511 for (unsigned i = NumDefs; i < NumOperands; ++i) {
1512 Register Reg = MI.getOperand(i).getReg();
1513 if (MRI.getRegBank(Reg) != RB) {
1514 auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
1515 MI.getOperand(i).setReg(Copy.getReg(0));
1516 }
1517 }
1518 }
1519}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Provides AMDGPU specific target descriptions.
static bool isSignedBFE(MachineInstr &MI)
static bool verifyRegBankOnOperands(MachineInstr &MI, const RegisterBank *RB, MachineRegisterInfo &MRI, unsigned StartOpIdx, unsigned EndOpIdx)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
static Register UseReg(const MachineOperand &MO)
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
@ ICMP_NE
not equal
Definition InstrTypes.h:698
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
const SIRegisterInfo * getRegisterInfo() const override
Represents a call to an intrinsic.
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
A range adaptor for a pair of iterators.
bool isAnyPtr(LLT Ty, unsigned Width)
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
void buildReadFirstLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc, const RegisterBankInfo &RBI)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
@ Offset
Definition DWP.cpp:532
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI void reportGISelFailure(MachineFunction &MF, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R)
Report an ISel error as a missed optimization remark to the LLVMContext's diagnostic stream.
Definition Utils.cpp:259
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping