LLVM 22.0.0git
AArch64RegisterBankInfo.cpp
Go to the documentation of this file.
1//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the RegisterBankInfo class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64RegisterInfo.h"
16#include "AArch64Subtarget.h"
18#include "llvm/ADT/STLExtras.h"
33#include "llvm/IR/IntrinsicsAArch64.h"
36#include <cassert>
37
38#define GET_TARGET_REGBANK_IMPL
39#include "AArch64GenRegisterBank.inc"
40
41// This file will be TableGen'ed at some point.
42#include "AArch64GenRegisterBankInfo.def"
43
44using namespace llvm;
45static const unsigned CustomMappingID = 1;
46
48 const TargetRegisterInfo &TRI) {
49 static llvm::once_flag InitializeRegisterBankFlag;
50
51 static auto InitializeRegisterBankOnce = [&]() {
52 // We have only one set of register banks, whatever the subtarget
53 // is. Therefore, the initialization of the RegBanks table should be
54 // done only once. Indeed the table of all register banks
55 // (AArch64::RegBanks) is unique in the compiler. At some point, it
56 // will get tablegen'ed and the whole constructor becomes empty.
57
58 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
59 (void)RBGPR;
60 assert(&AArch64::GPRRegBank == &RBGPR &&
61 "The order in RegBanks is messed up");
62
63 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
64 (void)RBFPR;
65 assert(&AArch64::FPRRegBank == &RBFPR &&
66 "The order in RegBanks is messed up");
67
68 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
69 (void)RBCCR;
70 assert(&AArch64::CCRegBank == &RBCCR &&
71 "The order in RegBanks is messed up");
72
73 // The GPR register bank is fully defined by all the registers in
74 // GR64all + its subclasses.
75 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
76 "Subclass not added?");
77 assert(getMaximumSize(RBGPR.getID()) == 128 &&
78 "GPRs should hold up to 128-bit");
79
80 // The FPR register bank is fully defined by all the registers in
81 // GR64all + its subclasses.
82 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
83 "Subclass not added?");
84 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
85 "Subclass not added?");
86 assert(getMaximumSize(RBFPR.getID()) == 512 &&
87 "FPRs should hold up to 512-bit via QQQQ sequence");
88
89 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
90 "Class not added?");
91 assert(getMaximumSize(RBCCR.getID()) == 32 &&
92 "CCR should hold up to 32-bit");
93
94 // Check that the TableGen'ed like file is in sync we our expectations.
95 // First, the Idx.
98 "PartialMappingIdx's are incorrectly ordered");
102 "PartialMappingIdx's are incorrectly ordered");
103// Now, the content.
104// Check partial mapping.
105#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
106 do { \
107 assert( \
108 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
109 #Idx " is incorrectly initialized"); \
110 } while (false)
111
112 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
113 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
114 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
115 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
116 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
117 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
118 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
119 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
120 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
121
122// Check value mapping.
123#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
124 do { \
125 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
126 PartialMappingIdx::PMI_First##RBName, Size, \
127 Offset) && \
128 #RBName #Size " " #Offset " is incorrectly initialized"); \
129 } while (false)
130
131#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
132
133 CHECK_VALUEMAP(GPR, 32);
134 CHECK_VALUEMAP(GPR, 64);
135 CHECK_VALUEMAP(GPR, 128);
136 CHECK_VALUEMAP(FPR, 16);
137 CHECK_VALUEMAP(FPR, 32);
138 CHECK_VALUEMAP(FPR, 64);
139 CHECK_VALUEMAP(FPR, 128);
140 CHECK_VALUEMAP(FPR, 256);
141 CHECK_VALUEMAP(FPR, 512);
142
143// Check the value mapping for 3-operands instructions where all the operands
144// map to the same value mapping.
145#define CHECK_VALUEMAP_3OPS(RBName, Size) \
146 do { \
147 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
148 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
149 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
150 } while (false)
151
152 CHECK_VALUEMAP_3OPS(GPR, 32);
153 CHECK_VALUEMAP_3OPS(GPR, 64);
154 CHECK_VALUEMAP_3OPS(GPR, 128);
160
161#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
162 do { \
163 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
164 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
165 (void)PartialMapDstIdx; \
166 (void)PartialMapSrcIdx; \
167 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \
168 AArch64::RBNameSrc##RegBankID, \
169 TypeSize::getFixed(Size)); \
170 (void)Map; \
171 assert(Map[0].BreakDown == \
172 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
173 Map[0].NumBreakDowns == 1 && \
174 #RBNameDst #Size " Dst is incorrectly initialized"); \
175 assert(Map[1].BreakDown == \
176 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
177 Map[1].NumBreakDowns == 1 && \
178 #RBNameSrc #Size " Src is incorrectly initialized"); \
179 \
180 } while (false)
181
182 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
184 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
190
191#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
192 do { \
193 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
194 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
195 (void)PartialMapDstIdx; \
196 (void)PartialMapSrcIdx; \
197 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
198 (void)Map; \
199 assert(Map[0].BreakDown == \
200 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
201 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
202 " Dst is incorrectly initialized"); \
203 assert(Map[1].BreakDown == \
204 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
205 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
206 " Src is incorrectly initialized"); \
207 \
208 } while (false)
209
210 CHECK_VALUEMAP_FPEXT(32, 16);
211 CHECK_VALUEMAP_FPEXT(64, 16);
212 CHECK_VALUEMAP_FPEXT(64, 32);
213 CHECK_VALUEMAP_FPEXT(128, 64);
214
215 assert(verify(TRI) && "Invalid register bank information");
216 };
217
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
219}
220
222 const RegisterBank &B,
223 const TypeSize Size) const {
224 // What do we do with different size?
225 // copy are same size.
226 // Will introduce other hooks for different size:
227 // * extract cost.
228 // * build_sequence cost.
229
230 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
231 // FIXME: This should be deduced from the scheduling model.
232 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
233 // FMOVXDr or FMOVWSr.
234 return 5;
235 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
236 // FMOVDXr or FMOVSWr.
237 return 4;
238
240}
241
242const RegisterBank &
244 LLT Ty) const {
245 switch (RC.getID()) {
246 case AArch64::GPR64sponlyRegClassID:
247 return getRegBank(AArch64::GPRRegBankID);
248 default:
250 }
251}
252
255 const MachineInstr &MI) const {
256 const MachineFunction &MF = *MI.getParent()->getParent();
257 const TargetSubtargetInfo &STI = MF.getSubtarget();
258 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
259 const MachineRegisterInfo &MRI = MF.getRegInfo();
260
261 switch (MI.getOpcode()) {
262 case TargetOpcode::G_OR: {
263 // 32 and 64-bit or can be mapped on either FPR or
264 // GPR for the same cost.
265 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
266 if (Size != 32 && Size != 64)
267 break;
268
269 // If the instruction has any implicit-defs or uses,
270 // do not mess with it.
271 if (MI.getNumOperands() != 3)
272 break;
273 InstructionMappings AltMappings;
274 const InstructionMapping &GPRMapping = getInstructionMapping(
275 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
276 /*NumOperands*/ 3);
277 const InstructionMapping &FPRMapping = getInstructionMapping(
278 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
279 /*NumOperands*/ 3);
280
281 AltMappings.push_back(&GPRMapping);
282 AltMappings.push_back(&FPRMapping);
283 return AltMappings;
284 }
285 case TargetOpcode::G_BITCAST: {
286 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
287 if (Size != 32 && Size != 64)
288 break;
289
290 // If the instruction has any implicit-defs or uses,
291 // do not mess with it.
292 if (MI.getNumOperands() != 2)
293 break;
294
295 InstructionMappings AltMappings;
296 const InstructionMapping &GPRMapping = getInstructionMapping(
297 /*ID*/ 1, /*Cost*/ 1,
298 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
299 /*NumOperands*/ 2);
300 const InstructionMapping &FPRMapping = getInstructionMapping(
301 /*ID*/ 2, /*Cost*/ 1,
302 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
303 /*NumOperands*/ 2);
304 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
305 /*ID*/ 3,
306 /*Cost*/
307 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
309 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
310 /*NumOperands*/ 2);
311 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
312 /*ID*/ 3,
313 /*Cost*/
314 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
316 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
317 /*NumOperands*/ 2);
318
319 AltMappings.push_back(&GPRMapping);
320 AltMappings.push_back(&FPRMapping);
321 AltMappings.push_back(&GPRToFPRMapping);
322 AltMappings.push_back(&FPRToGPRMapping);
323 return AltMappings;
324 }
325 case TargetOpcode::G_LOAD: {
326 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
327 if (Size != 64)
328 break;
329
330 // If the instruction has any implicit-defs or uses,
331 // do not mess with it.
332 if (MI.getNumOperands() != 2)
333 break;
334
335 InstructionMappings AltMappings;
336 const InstructionMapping &GPRMapping = getInstructionMapping(
337 /*ID*/ 1, /*Cost*/ 1,
340 // Addresses are GPR 64-bit.
342 /*NumOperands*/ 2);
343 const InstructionMapping &FPRMapping = getInstructionMapping(
344 /*ID*/ 2, /*Cost*/ 1,
347 // Addresses are GPR 64-bit.
349 /*NumOperands*/ 2);
350
351 AltMappings.push_back(&GPRMapping);
352 AltMappings.push_back(&FPRMapping);
353 return AltMappings;
354 }
355 default:
356 break;
357 }
359}
360
361void AArch64RegisterBankInfo::applyMappingImpl(
362 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
363 MachineInstr &MI = OpdMapper.getMI();
364 MachineRegisterInfo &MRI = OpdMapper.getMRI();
365
366 switch (MI.getOpcode()) {
367 case TargetOpcode::G_OR:
368 case TargetOpcode::G_BITCAST:
369 case TargetOpcode::G_LOAD:
370 // Those ID must match getInstrAlternativeMappings.
371 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
372 OpdMapper.getInstrMapping().getID() <= 4) &&
373 "Don't know how to handle that ID");
374 return applyDefaultMapping(OpdMapper);
375 case TargetOpcode::G_INSERT_VECTOR_ELT: {
376 // Extend smaller gpr operands to 32 bit.
377 Builder.setInsertPt(*MI.getParent(), MI.getIterator());
378 auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg());
379 MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID));
380 MI.getOperand(2).setReg(Ext.getReg(0));
381 return applyDefaultMapping(OpdMapper);
382 }
383 case AArch64::G_DUP: {
384 // Extend smaller gpr to 32-bits
385 assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
386 "Expected sources smaller than 32-bits");
387 Builder.setInsertPt(*MI.getParent(), MI.getIterator());
388
389 Register ConstReg;
390 auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
391 if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
392 auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
393 ConstReg =
394 Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
395 } else {
396 ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
397 .getReg(0);
398 }
399 MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
400 MI.getOperand(1).setReg(ConstReg);
401 return applyDefaultMapping(OpdMapper);
402 }
403 default:
404 llvm_unreachable("Don't know how to handle that operation");
405 }
406}
407
409AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
410 const MachineInstr &MI) const {
411 const unsigned Opc = MI.getOpcode();
412 const MachineFunction &MF = *MI.getParent()->getParent();
413 const MachineRegisterInfo &MRI = MF.getRegInfo();
414
415 unsigned NumOperands = MI.getNumOperands();
416 assert(NumOperands <= 3 &&
417 "This code is for instructions with 3 or less operands");
418
419 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
420 TypeSize Size = Ty.getSizeInBits();
422
424
425#ifndef NDEBUG
426 // Make sure all the operands are using similar size and type.
427 // Should probably be checked by the machine verifier.
428 // This code won't catch cases where the number of lanes is
429 // different between the operands.
430 // If we want to go to that level of details, it is probably
431 // best to check that the types are the same, period.
432 // Currently, we just check that the register banks are the same
433 // for each types.
434 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
435 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
436 assert(
438 RBIdx, OpTy.getSizeInBits()) ==
440 "Operand has incompatible size");
441 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
442 (void)OpIsFPR;
443 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
444 }
445#endif // End NDEBUG.
446
448 getValueMapping(RBIdx, Size), NumOperands);
449}
450
451/// \returns true if a given intrinsic only uses and defines FPRs.
453 const MachineInstr &MI) {
454 // TODO: Add more intrinsics.
456 default:
457 return false;
458 case Intrinsic::aarch64_neon_uaddlv:
459 case Intrinsic::aarch64_neon_uaddv:
460 case Intrinsic::aarch64_neon_saddv:
461 case Intrinsic::aarch64_neon_umaxv:
462 case Intrinsic::aarch64_neon_smaxv:
463 case Intrinsic::aarch64_neon_uminv:
464 case Intrinsic::aarch64_neon_sminv:
465 case Intrinsic::aarch64_neon_faddv:
466 case Intrinsic::aarch64_neon_fmaxv:
467 case Intrinsic::aarch64_neon_fminv:
468 case Intrinsic::aarch64_neon_fmaxnmv:
469 case Intrinsic::aarch64_neon_fminnmv:
470 case Intrinsic::aarch64_neon_fmulx:
471 case Intrinsic::aarch64_neon_frecpe:
472 case Intrinsic::aarch64_neon_frecps:
473 case Intrinsic::aarch64_neon_frecpx:
474 case Intrinsic::aarch64_neon_frsqrte:
475 case Intrinsic::aarch64_neon_frsqrts:
476 case Intrinsic::aarch64_neon_facge:
477 case Intrinsic::aarch64_neon_facgt:
478 case Intrinsic::aarch64_neon_fabd:
479 case Intrinsic::aarch64_neon_sqrdmlah:
480 case Intrinsic::aarch64_neon_sqrdmlsh:
481 case Intrinsic::aarch64_neon_sqrdmulh:
482 case Intrinsic::aarch64_neon_sqadd:
483 case Intrinsic::aarch64_neon_sqsub:
484 case Intrinsic::aarch64_neon_srshl:
485 case Intrinsic::aarch64_neon_urshl:
486 case Intrinsic::aarch64_neon_sqshl:
487 case Intrinsic::aarch64_neon_uqshl:
488 case Intrinsic::aarch64_neon_sqrshl:
489 case Intrinsic::aarch64_neon_uqrshl:
490 case Intrinsic::aarch64_neon_ushl:
491 case Intrinsic::aarch64_neon_sshl:
492 case Intrinsic::aarch64_neon_sqshrn:
493 case Intrinsic::aarch64_neon_sqshrun:
494 case Intrinsic::aarch64_neon_sqrshrn:
495 case Intrinsic::aarch64_neon_sqrshrun:
496 case Intrinsic::aarch64_neon_uqshrn:
497 case Intrinsic::aarch64_neon_uqrshrn:
498 case Intrinsic::aarch64_crypto_sha1h:
499 case Intrinsic::aarch64_crypto_sha1c:
500 case Intrinsic::aarch64_crypto_sha1p:
501 case Intrinsic::aarch64_crypto_sha1m:
502 case Intrinsic::aarch64_sisd_fcvtxn:
503 case Intrinsic::aarch64_sisd_fabd:
504 return true;
505 case Intrinsic::aarch64_neon_saddlv: {
506 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
507 return SrcTy.getElementType().getSizeInBits() >= 16 &&
508 SrcTy.getElementCount().getFixedValue() >= 4;
509 }
510 }
511}
512
513bool AArch64RegisterBankInfo::isPHIWithFPConstraints(
514 const MachineInstr &MI, const MachineRegisterInfo &MRI,
515 const AArch64RegisterInfo &TRI, const unsigned Depth) const {
516 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
517 return false;
518
519 return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
520 [&](const MachineInstr &UseMI) {
521 if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
522 return true;
523 return isPHIWithFPConstraints(UseMI, MRI, TRI, Depth + 1);
524 });
525}
526
527bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
530 unsigned Depth) const {
531 unsigned Op = MI.getOpcode();
532 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
533 return true;
534
535 // Do we have an explicit floating point instruction?
537 return true;
538
539 // No. Check if we have a copy-like instruction. If we do, then we could
540 // still be fed by floating point instructions.
541 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
543 return false;
544
545 // Check if we already know the register bank.
546 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
547 if (RB == &AArch64::FPRRegBank)
548 return true;
549 if (RB == &AArch64::GPRRegBank)
550 return false;
551
552 // We don't know anything.
553 //
554 // If we have a phi, we may be able to infer that it will be assigned a FPR
555 // based off of its inputs.
556 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
557 return false;
558
559 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
560 return Op.isReg() &&
561 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
562 });
563}
564
565bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
568 unsigned Depth) const {
569 switch (MI.getOpcode()) {
570 case TargetOpcode::G_FPTOSI:
571 case TargetOpcode::G_FPTOUI:
572 case TargetOpcode::G_FPTOSI_SAT:
573 case TargetOpcode::G_FPTOUI_SAT:
574 case TargetOpcode::G_FCMP:
575 case TargetOpcode::G_LROUND:
576 case TargetOpcode::G_LLROUND:
577 case AArch64::G_PMULL:
578 case AArch64::G_SLI:
579 case AArch64::G_SRI:
580 return true;
581 case TargetOpcode::G_INTRINSIC:
583 case Intrinsic::aarch64_neon_fcvtas:
584 case Intrinsic::aarch64_neon_fcvtau:
585 case Intrinsic::aarch64_neon_fcvtzs:
586 case Intrinsic::aarch64_neon_fcvtzu:
587 case Intrinsic::aarch64_neon_fcvtms:
588 case Intrinsic::aarch64_neon_fcvtmu:
589 case Intrinsic::aarch64_neon_fcvtns:
590 case Intrinsic::aarch64_neon_fcvtnu:
591 case Intrinsic::aarch64_neon_fcvtps:
592 case Intrinsic::aarch64_neon_fcvtpu:
593 return true;
594 default:
595 break;
596 }
597 break;
598 default:
599 break;
600 }
601 return hasFPConstraints(MI, MRI, TRI, Depth);
602}
603
604bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
607 unsigned Depth) const {
608 switch (MI.getOpcode()) {
609 case AArch64::G_DUP:
610 case AArch64::G_SADDLP:
611 case AArch64::G_UADDLP:
612 case TargetOpcode::G_SITOFP:
613 case TargetOpcode::G_UITOFP:
614 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
615 case TargetOpcode::G_INSERT_VECTOR_ELT:
616 case TargetOpcode::G_BUILD_VECTOR:
617 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
618 case AArch64::G_SLI:
619 case AArch64::G_SRI:
620 return true;
621 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
623 case Intrinsic::aarch64_neon_ld1x2:
624 case Intrinsic::aarch64_neon_ld1x3:
625 case Intrinsic::aarch64_neon_ld1x4:
626 case Intrinsic::aarch64_neon_ld2:
627 case Intrinsic::aarch64_neon_ld2lane:
628 case Intrinsic::aarch64_neon_ld2r:
629 case Intrinsic::aarch64_neon_ld3:
630 case Intrinsic::aarch64_neon_ld3lane:
631 case Intrinsic::aarch64_neon_ld3r:
632 case Intrinsic::aarch64_neon_ld4:
633 case Intrinsic::aarch64_neon_ld4lane:
634 case Intrinsic::aarch64_neon_ld4r:
635 return true;
636 default:
637 break;
638 }
639 break;
640 default:
641 break;
642 }
643 return hasFPConstraints(MI, MRI, TRI, Depth);
644}
645
646bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
649 unsigned Depth) const {
650 switch (MI.getOpcode()) {
651 case TargetOpcode::G_SITOFP:
652 case TargetOpcode::G_UITOFP:
653 return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() ==
654 MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
655 }
656 return onlyDefinesFP(MI, MRI, TRI, Depth);
657}
658
659bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
660 // GMemOperation because we also want to match indexed loads.
661 auto *MemOp = cast<GMemOperation>(&MI);
662 const Value *LdVal = MemOp->getMMO().getValue();
663 if (!LdVal)
664 return false;
665
666 Type *EltTy = nullptr;
667 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
668 EltTy = GV->getValueType();
669 // Look at the first element of the struct to determine the type we are
670 // loading
671 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
672 if (StructEltTy->getNumElements() == 0)
673 break;
674 EltTy = StructEltTy->getTypeAtIndex(0U);
675 }
676 // Look at the first element of the array to determine its type
677 if (isa<ArrayType>(EltTy))
678 EltTy = EltTy->getArrayElementType();
679 } else if (!isa<Constant>(LdVal)) {
680 // FIXME: grubbing around uses is pretty ugly, but with no more
681 // `getPointerElementType` there's not much else we can do.
682 for (const auto *LdUser : LdVal->users()) {
683 if (isa<LoadInst>(LdUser)) {
684 EltTy = LdUser->getType();
685 break;
686 }
687 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
688 EltTy = LdUser->getOperand(0)->getType();
689 break;
690 }
691 }
692 }
693 return EltTy && EltTy->isFPOrFPVectorTy();
694}
695
698 const unsigned Opc = MI.getOpcode();
699
700 // Try the default logic for non-generic instructions that are either copies
701 // or already have some operands assigned to banks.
702 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
703 Opc == TargetOpcode::G_PHI) {
706 if (Mapping.isValid())
707 return Mapping;
708 }
709
710 const MachineFunction &MF = *MI.getParent()->getParent();
711 const MachineRegisterInfo &MRI = MF.getRegInfo();
714
715 switch (Opc) {
716 // G_{F|S|U}REM are not listed because they are not legal.
717 // Arithmetic ops.
718 case TargetOpcode::G_ADD:
719 case TargetOpcode::G_SUB:
720 case TargetOpcode::G_PTR_ADD:
721 case TargetOpcode::G_MUL:
722 case TargetOpcode::G_SDIV:
723 case TargetOpcode::G_UDIV:
724 // Bitwise ops.
725 case TargetOpcode::G_AND:
726 case TargetOpcode::G_OR:
727 case TargetOpcode::G_XOR:
728 // Floating point ops.
729 case TargetOpcode::G_FADD:
730 case TargetOpcode::G_FSUB:
731 case TargetOpcode::G_FMUL:
732 case TargetOpcode::G_FDIV:
733 case TargetOpcode::G_FMAXIMUM:
734 case TargetOpcode::G_FMINIMUM:
735 return getSameKindOfOperandsMapping(MI);
736 case TargetOpcode::G_FPEXT: {
737 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
738 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
740 DefaultMappingID, /*Cost*/ 1,
741 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
742 /*NumOperands*/ 2);
743 }
744 // Shifts.
745 case TargetOpcode::G_SHL:
746 case TargetOpcode::G_LSHR:
747 case TargetOpcode::G_ASHR: {
748 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
749 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
750 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
753 return getSameKindOfOperandsMapping(MI);
754 }
755 case TargetOpcode::COPY: {
756 Register DstReg = MI.getOperand(0).getReg();
757 Register SrcReg = MI.getOperand(1).getReg();
758 // Check if one of the register is not a generic register.
759 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
760 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
761 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
762 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
763 if (!DstRB)
764 DstRB = SrcRB;
765 else if (!SrcRB)
766 SrcRB = DstRB;
767 // If both RB are null that means both registers are generic.
768 // We shouldn't be here.
769 assert(DstRB && SrcRB && "Both RegBank were nullptr");
770 TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
772 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
773 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
774 // We only care about the mapping of the destination.
775 /*NumOperands*/ 1);
776 }
777 // Both registers are generic, use G_BITCAST.
778 [[fallthrough]];
779 }
780 case TargetOpcode::G_BITCAST: {
781 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
782 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
783 TypeSize Size = DstTy.getSizeInBits();
784 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
785 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
786 const RegisterBank &DstRB =
787 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
788 const RegisterBank &SrcRB =
789 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
791 DefaultMappingID, copyCost(DstRB, SrcRB, Size),
792 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
793 // We only care about the mapping of the destination for COPY.
794 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
795 }
796 default:
797 break;
798 }
799
800 unsigned NumOperands = MI.getNumOperands();
801 unsigned MappingID = DefaultMappingID;
802
803 // Track the size and bank of each register. We don't do partial mappings.
804 SmallVector<unsigned, 4> OpSize(NumOperands);
805 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
806 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
807 auto &MO = MI.getOperand(Idx);
808 if (!MO.isReg() || !MO.getReg())
809 continue;
810
811 LLT Ty = MRI.getType(MO.getReg());
812 if (!Ty.isValid())
813 continue;
814 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
815
816 // As a top-level guess, vectors including both scalable and non-scalable
817 // ones go in FPRs, scalars and pointers in GPRs.
818 // For floating-point instructions, scalars go in FPRs.
819 if (Ty.isVector())
820 OpRegBankIdx[Idx] = PMI_FirstFPR;
822 (MO.isDef() && onlyDefinesFP(MI, MRI, TRI)) ||
823 (MO.isUse() && onlyUsesFP(MI, MRI, TRI)) ||
824 Ty.getSizeInBits() > 64)
825 OpRegBankIdx[Idx] = PMI_FirstFPR;
826 else
827 OpRegBankIdx[Idx] = PMI_FirstGPR;
828 }
829
830 unsigned Cost = 1;
831 // Some of the floating-point instructions have mixed GPR and FPR operands:
832 // fine-tune the computed mapping.
833 switch (Opc) {
834 case AArch64::G_DUP: {
835 Register ScalarReg = MI.getOperand(1).getReg();
836 LLT ScalarTy = MRI.getType(ScalarReg);
837 auto ScalarDef = MRI.getVRegDef(ScalarReg);
838 // We want to select dup(load) into LD1R.
839 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
840 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
841 // s8 is an exception for G_DUP, which we always want on gpr.
842 else if (ScalarTy.getSizeInBits() != 8 &&
843 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
844 onlyDefinesFP(*ScalarDef, MRI, TRI)))
845 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
846 else {
847 if (ScalarTy.getSizeInBits() < 32 &&
848 getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
849 // Calls applyMappingImpl()
850 MappingID = CustomMappingID;
851 }
852 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
853 }
854 break;
855 }
856 case TargetOpcode::G_TRUNC: {
857 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
858 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
859 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
860 break;
861 }
862 case TargetOpcode::G_SITOFP:
863 case TargetOpcode::G_UITOFP: {
864 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
865 break;
866 // Integer to FP conversions don't necessarily happen between GPR -> FPR
867 // regbanks. They can also be done within an FPR register.
868 Register SrcReg = MI.getOperand(1).getReg();
869 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
870 MRI.getType(SrcReg).getSizeInBits() ==
871 MRI.getType(MI.getOperand(0).getReg()).getSizeInBits())
872 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
873 else
874 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
875 break;
876 }
877 case TargetOpcode::G_FPTOSI_SAT:
878 case TargetOpcode::G_FPTOUI_SAT:
879 case TargetOpcode::G_FPTOSI:
880 case TargetOpcode::G_FPTOUI:
881 case TargetOpcode::G_INTRINSIC_LRINT:
882 case TargetOpcode::G_INTRINSIC_LLRINT:
883 case TargetOpcode::G_LROUND:
884 case TargetOpcode::G_LLROUND: {
885 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
886 if (DstType.isVector())
887 break;
888 if (DstType == LLT::scalar(16)) {
889 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
890 break;
891 }
892 TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
893 TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
894 if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
895 all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
896 [&](const MachineInstr &UseMI) {
897 return onlyUsesFP(UseMI, MRI, TRI) ||
898 prefersFPUse(UseMI, MRI, TRI);
899 }))
900 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
901 else
902 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
903 break;
904 }
905 case TargetOpcode::G_FCMP: {
906 // If the result is a vector, it must use a FPR.
908 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
909 : PMI_FirstGPR;
910 OpRegBankIdx = {Idx0,
911 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
912 break;
913 }
914 case TargetOpcode::G_BITCAST:
915 // This is going to be a cross register bank copy and this is expensive.
916 if (OpRegBankIdx[0] != OpRegBankIdx[1])
917 Cost = copyCost(
918 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
919 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
920 TypeSize::getFixed(OpSize[0]));
921 break;
922 case TargetOpcode::G_LOAD: {
923 // Loading in vector unit is slightly more expensive.
924 // This is actually only true for the LD1R and co instructions,
925 // but anyway for the fast mode this number does not matter and
926 // for the greedy mode the cost of the cross bank copy will
927 // offset this number.
928 // FIXME: Should be derived from the scheduling model.
929 if (OpRegBankIdx[0] != PMI_FirstGPR) {
930 Cost = 2;
931 break;
932 }
933
934 if (cast<GLoad>(MI).isAtomic()) {
935 // Atomics always use GPR destinations. Don't refine any further.
936 OpRegBankIdx[0] = PMI_FirstGPR;
937 break;
938 }
939
940 // Try to guess the type of the load from the MMO.
941 if (isLoadFromFPType(MI)) {
942 OpRegBankIdx[0] = PMI_FirstFPR;
943 break;
944 }
945
946 // Check if that load feeds fp instructions.
947 // In that case, we want the default mapping to be on FPR
948 // instead of blind map every scalar to GPR.
949 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
950 [&](const MachineInstr &UseMI) {
951 // If we have at least one direct or indirect use
952 // in a FP instruction,
953 // assume this was a floating point load in the IR. If it was
954 // not, we would have had a bitcast before reaching that
955 // instruction.
956 //
957 // Int->FP conversion operations are also captured in
958 // prefersFPUse().
959
960 if (isPHIWithFPConstraints(UseMI, MRI, TRI))
961 return true;
962
963 return onlyUsesFP(UseMI, MRI, TRI) ||
964 prefersFPUse(UseMI, MRI, TRI);
965 }))
966 OpRegBankIdx[0] = PMI_FirstFPR;
967 break;
968 }
969 case TargetOpcode::G_STORE:
970 // Check if that store is fed by fp instructions.
971 if (OpRegBankIdx[0] == PMI_FirstGPR) {
972 Register VReg = MI.getOperand(0).getReg();
973 if (!VReg)
974 break;
975 MachineInstr *DefMI = MRI.getVRegDef(VReg);
976 if (onlyDefinesFP(*DefMI, MRI, TRI))
977 OpRegBankIdx[0] = PMI_FirstFPR;
978 break;
979 }
980 break;
981 case TargetOpcode::G_INDEXED_STORE:
982 if (OpRegBankIdx[1] == PMI_FirstGPR) {
983 Register VReg = MI.getOperand(1).getReg();
984 if (!VReg)
985 break;
986 MachineInstr *DefMI = MRI.getVRegDef(VReg);
987 if (onlyDefinesFP(*DefMI, MRI, TRI))
988 OpRegBankIdx[1] = PMI_FirstFPR;
989 break;
990 }
991 break;
992 case TargetOpcode::G_INDEXED_SEXTLOAD:
993 case TargetOpcode::G_INDEXED_ZEXTLOAD:
994 // These should always be GPR.
995 OpRegBankIdx[0] = PMI_FirstGPR;
996 break;
997 case TargetOpcode::G_INDEXED_LOAD: {
998 if (isLoadFromFPType(MI))
999 OpRegBankIdx[0] = PMI_FirstFPR;
1000 break;
1001 }
1002 case TargetOpcode::G_SELECT: {
1003 // If the destination is FPR, preserve that.
1004 if (OpRegBankIdx[0] != PMI_FirstGPR)
1005 break;
1006
1007 // If we're taking in vectors, we have no choice but to put everything on
1008 // FPRs, except for the condition. The condition must always be on a GPR.
1009 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
1010 if (SrcTy.isVector()) {
1012 break;
1013 }
1014
1015 // Try to minimize the number of copies. If we have more floating point
1016 // constrained values than not, then we'll put everything on FPR. Otherwise,
1017 // everything has to be on GPR.
1018 unsigned NumFP = 0;
1019
1020 // Check if the uses of the result always produce floating point values.
1021 //
1022 // For example:
1023 //
1024 // %z = G_SELECT %cond %x %y
1025 // fpr = G_FOO %z ...
1026 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1027 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
1028 ++NumFP;
1029
1030 // Check if the defs of the source values always produce floating point
1031 // values.
1032 //
1033 // For example:
1034 //
1035 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
1036 // %z = G_SELECT %cond %x %y
1037 //
1038 // Also check whether or not the sources have already been decided to be
1039 // FPR. Keep track of this.
1040 //
1041 // This doesn't check the condition, since it's just whatever is in NZCV.
1042 // This isn't passed explicitly in a register to fcsel/csel.
1043 for (unsigned Idx = 2; Idx < 4; ++Idx) {
1044 Register VReg = MI.getOperand(Idx).getReg();
1045 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1046 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
1047 onlyDefinesFP(*DefMI, MRI, TRI))
1048 ++NumFP;
1049 }
1050
1051 // If we have more FP constraints than not, then move everything over to
1052 // FPR.
1053 if (NumFP >= 2)
1055
1056 break;
1057 }
1058 case TargetOpcode::G_UNMERGE_VALUES: {
1059 // If the first operand belongs to a FPR register bank, then make sure that
1060 // we preserve that.
1061 if (OpRegBankIdx[0] != PMI_FirstGPR)
1062 break;
1063
1064 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
1065 // UNMERGE into scalars from a vector should always use FPR.
1066 // Likewise if any of the uses are FP instructions.
1067 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
1068 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1069 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
1070 // Set the register bank of every operand to FPR.
1071 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
1072 Idx < NumOperands; ++Idx)
1073 OpRegBankIdx[Idx] = PMI_FirstFPR;
1074 }
1075 break;
1076 }
1077 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1078 // Destination and source need to be FPRs.
1079 OpRegBankIdx[0] = PMI_FirstFPR;
1080 OpRegBankIdx[1] = PMI_FirstFPR;
1081
1082 // Index needs to be a GPR.
1083 OpRegBankIdx[2] = PMI_FirstGPR;
1084 break;
1085 case AArch64::G_SQSHLU_I:
1086 // Destination and source need to be FPRs.
1087 OpRegBankIdx[0] = PMI_FirstFPR;
1088 OpRegBankIdx[1] = PMI_FirstFPR;
1089
1090 // Shift Index needs to be a GPR.
1091 OpRegBankIdx[2] = PMI_FirstGPR;
1092 break;
1093
1094 case TargetOpcode::G_INSERT_VECTOR_ELT:
1095 OpRegBankIdx[0] = PMI_FirstFPR;
1096 OpRegBankIdx[1] = PMI_FirstFPR;
1097
1098 // The element may be either a GPR or FPR. Preserve that behaviour.
1099 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1100 OpRegBankIdx[2] = PMI_FirstFPR;
1101 else {
1102 // If the type is i8/i16, and the regank will be GPR, then we change the
1103 // type to i32 in applyMappingImpl.
1104 LLT Ty = MRI.getType(MI.getOperand(2).getReg());
1105 if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
1106 // Calls applyMappingImpl()
1107 MappingID = CustomMappingID;
1108 }
1109 OpRegBankIdx[2] = PMI_FirstGPR;
1110 }
1111
1112 // Index needs to be a GPR.
1113 OpRegBankIdx[3] = PMI_FirstGPR;
1114 break;
1115 case TargetOpcode::G_EXTRACT: {
1116 // For s128 sources we have to use fpr unless we know otherwise.
1117 auto Src = MI.getOperand(1).getReg();
1118 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1119 if (SrcTy.getSizeInBits() != 128)
1120 break;
1121 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1122 ? PMI_FirstGPR
1123 : PMI_FirstFPR;
1124 OpRegBankIdx[0] = Idx;
1125 OpRegBankIdx[1] = Idx;
1126 break;
1127 }
1128 case TargetOpcode::G_BUILD_VECTOR: {
1129 // If the first source operand belongs to a FPR register bank, then make
1130 // sure that we preserve that.
1131 if (OpRegBankIdx[1] != PMI_FirstGPR)
1132 break;
1133 Register VReg = MI.getOperand(1).getReg();
1134 if (!VReg)
1135 break;
1136
1137 // Get the instruction that defined the source operand reg, and check if
1138 // it's a floating point operation. Or, if it's a type like s16 which
1139 // doesn't have a exact size gpr register class. The exception is if the
1140 // build_vector has all constant operands, which may be better to leave as
1141 // gpr without copies, so it can be matched in imported patterns.
1142 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1143 unsigned DefOpc = DefMI->getOpcode();
1144 const LLT SrcTy = MRI.getType(VReg);
1145 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1146 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1147 TargetOpcode::G_CONSTANT;
1148 }))
1149 break;
1151 SrcTy.getSizeInBits() < 32 ||
1152 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1153 // Have a floating point op.
1154 // Make sure every operand gets mapped to a FPR register class.
1155 unsigned NumOperands = MI.getNumOperands();
1156 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1157 OpRegBankIdx[Idx] = PMI_FirstFPR;
1158 }
1159 break;
1160 }
1161 case TargetOpcode::G_VECREDUCE_FADD:
1162 case TargetOpcode::G_VECREDUCE_FMUL:
1163 case TargetOpcode::G_VECREDUCE_FMAX:
1164 case TargetOpcode::G_VECREDUCE_FMIN:
1165 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1166 case TargetOpcode::G_VECREDUCE_FMINIMUM:
1167 case TargetOpcode::G_VECREDUCE_ADD:
1168 case TargetOpcode::G_VECREDUCE_MUL:
1169 case TargetOpcode::G_VECREDUCE_AND:
1170 case TargetOpcode::G_VECREDUCE_OR:
1171 case TargetOpcode::G_VECREDUCE_XOR:
1172 case TargetOpcode::G_VECREDUCE_SMAX:
1173 case TargetOpcode::G_VECREDUCE_SMIN:
1174 case TargetOpcode::G_VECREDUCE_UMAX:
1175 case TargetOpcode::G_VECREDUCE_UMIN:
1176 // Reductions produce a scalar value from a vector, the scalar should be on
1177 // FPR bank.
1178 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1179 break;
1180 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1181 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1182 // These reductions also take a scalar accumulator input.
1183 // Assign them FPR for now.
1184 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1185 break;
1186 case TargetOpcode::G_INTRINSIC:
1187 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1188 switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
1189 case Intrinsic::aarch64_neon_fcvtas:
1190 case Intrinsic::aarch64_neon_fcvtau:
1191 case Intrinsic::aarch64_neon_fcvtzs:
1192 case Intrinsic::aarch64_neon_fcvtzu:
1193 case Intrinsic::aarch64_neon_fcvtms:
1194 case Intrinsic::aarch64_neon_fcvtmu:
1195 case Intrinsic::aarch64_neon_fcvtns:
1196 case Intrinsic::aarch64_neon_fcvtnu:
1197 case Intrinsic::aarch64_neon_fcvtps:
1198 case Intrinsic::aarch64_neon_fcvtpu: {
1199 OpRegBankIdx[2] = PMI_FirstFPR;
1200 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
1201 OpRegBankIdx[0] = PMI_FirstFPR;
1202 break;
1203 }
1204 TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
1205 TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
1206 if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
1207 all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1208 [&](const MachineInstr &UseMI) {
1209 return onlyUsesFP(UseMI, MRI, TRI) ||
1210 prefersFPUse(UseMI, MRI, TRI);
1211 }))
1212 OpRegBankIdx[0] = PMI_FirstFPR;
1213 else
1214 OpRegBankIdx[0] = PMI_FirstGPR;
1215 break;
1216 }
1217 case Intrinsic::aarch64_neon_vcvtfxs2fp:
1218 case Intrinsic::aarch64_neon_vcvtfxu2fp:
1219 case Intrinsic::aarch64_neon_vcvtfp2fxs:
1220 case Intrinsic::aarch64_neon_vcvtfp2fxu:
1221 // Override these intrinsics, because they would have a partial
1222 // mapping. This is needed for 'half' types, which otherwise don't
1223 // get legalised correctly.
1224 OpRegBankIdx[0] = PMI_FirstFPR;
1225 OpRegBankIdx[2] = PMI_FirstFPR;
1226 // OpRegBankIdx[1] is the intrinsic ID.
1227 // OpRegBankIdx[3] is an integer immediate.
1228 break;
1229 default: {
1230 // Check if we know that the intrinsic has any constraints on its register
1231 // banks. If it does, then update the mapping accordingly.
1232 unsigned Idx = 0;
1233 if (onlyDefinesFP(MI, MRI, TRI))
1234 for (const auto &Op : MI.defs()) {
1235 if (Op.isReg())
1236 OpRegBankIdx[Idx] = PMI_FirstFPR;
1237 ++Idx;
1238 }
1239 else
1240 Idx += MI.getNumExplicitDefs();
1241
1242 if (onlyUsesFP(MI, MRI, TRI))
1243 for (const auto &Op : MI.explicit_uses()) {
1244 if (Op.isReg())
1245 OpRegBankIdx[Idx] = PMI_FirstFPR;
1246 ++Idx;
1247 }
1248 break;
1249 }
1250 }
1251 break;
1252 }
1253 }
1254
1255 // Finally construct the computed mapping.
1256 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1257 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1258 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1259 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1260 if (!Ty.isValid())
1261 continue;
1262 auto Mapping =
1263 getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx]));
1264 if (!Mapping->isValid())
1266
1267 OpdsMapping[Idx] = Mapping;
1268 }
1269 }
1270
1271 return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping),
1272 NumOperands);
1273}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define CHECK_VALUEMAP(RBName, Size)
static bool isFPIntrinsic(const MachineRegisterInfo &MRI, const MachineInstr &MI)
#define CHECK_VALUEMAP_3OPS(RBName, Size)
static const unsigned CustomMappingID
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)
#define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)
This file declares the targeting of the RegisterBankInfo class for AArch64.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Implement a low-level type suitable for MachineInstr level instruction selection.
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
ppc ctr loops verify
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, TypeSize Size)
static const RegisterBankInfo::ValueMapping * getCopyMapping(unsigned DstBankID, unsigned SrcBankID, TypeSize Size)
Get the pointer to the ValueMapping of the operands of a copy instruction from the SrcBankID register...
static bool checkPartialMappingIdx(PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias, ArrayRef< PartialMappingIdx > Order)
static const RegisterBankInfo::PartialMapping PartMappings[]
static const RegisterBankInfo::ValueMapping * getFPExtMapping(unsigned DstSize, unsigned SrcSize)
Get the instruction mapping for G_FPEXT.
static const RegisterBankInfo::ValueMapping * getValueMapping(PartialMappingIdx RBIdx, TypeSize Size)
Get the pointer to the ValueMapping representing the RegisterBank at RBIdx with a size of Size.
static const RegisterBankInfo::ValueMapping ValMappings[]
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const override
Get a register bank that covers RC.
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
Get the mapping of the different operands of MI on the register bank.
const AArch64RegisterInfo * getRegisterInfo() const override
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getMaximumSize(unsigned RegBankID) const
Get the maximum size in bits that fits in the given register bank.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
virtual const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const
Get a register bank that covers RC.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
static const unsigned DefaultMappingID
Identifier used when the related instruction mapping instance is generated by target independent code...
SmallVector< const InstructionMapping *, 4 > InstructionMappings
Convenient type to represent the alternatives for mapping an instruction.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
LLVM_ABI bool covers(const TargetRegisterClass &RC) const
Check whether this register bank covers RC.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
Type * getArrayElementType() const
Definition Type.h:408
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
bool isPreISelGenericOptimizationHint(unsigned Opcode)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition Threading.h:86
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isPreISelGenericFloatingPointOpcode(unsigned Opc)
Returns whether opcode Opc is a pre-isel generic floating-point opcode, having only floating-point op...
Definition Utils.cpp:1750
The llvm::once_flag structure.
Definition Threading.h:67