LLVM 23.0.0git
PPCFastISel.cpp
Go to the documentation of this file.
1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
20#include "PPCSelectionDAGInfo.h"
21#include "PPCSubtarget.h"
30#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Operator.h"
35
36//===----------------------------------------------------------------------===//
37//
38// TBD:
39// fastLowerArguments: Handle simple cases.
40// PPCMaterializeGV: Handle TLS.
41// SelectCall: Handle function pointers.
42// SelectCall: Handle multi-register return values.
43// SelectCall: Optimize away nops for local calls.
44// processCallArgs: Handle bit-converted arguments.
45// finishCall: Handle multi-register return values.
46// PPCComputeAddress: Handle parameter references as FrameIndex's.
47// PPCEmitCmp: Handle immediate as operand 1.
48// SelectCall: Handle small byval arguments.
49// SelectIntrinsicCall: Implement.
50// SelectSelect: Implement.
51// Consider factoring isTypeLegal into the base class.
52// Implement switches and jump tables.
53//
54//===----------------------------------------------------------------------===//
55using namespace llvm;
56
57#define DEBUG_TYPE "ppcfastisel"
58
59namespace {
60
61struct Address {
62 enum {
63 RegBase,
64 FrameIndexBase
65 } BaseType;
66
67 union {
68 unsigned Reg;
69 int FI;
70 } Base;
71
72 int64_t Offset;
73
74 // Innocuous defaults for our address.
75 Address()
76 : BaseType(RegBase), Offset(0) {
77 Base.Reg = 0;
78 }
79};
80
81class PPCFastISel final : public FastISel {
82
83 const TargetMachine &TM;
84 const PPCSubtarget *Subtarget;
85 PPCFunctionInfo *PPCFuncInfo;
86 const TargetInstrInfo &TII;
87 const TargetLowering &TLI;
88 LLVMContext *Context;
89
90 public:
91 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
92 const TargetLibraryInfo *LibInfo,
93 const LibcallLoweringInfo *LibcallLowering)
94 : FastISel(FuncInfo, LibInfo, LibcallLowering),
95 TM(FuncInfo.MF->getTarget()),
96 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
97 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
98 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
99 Context(&FuncInfo.Fn->getContext()) {}
100
101 // Backend specific FastISel code.
102 private:
103 bool fastSelectInstruction(const Instruction *I) override;
104 Register fastMaterializeConstant(const Constant *C) override;
105 Register fastMaterializeAlloca(const AllocaInst *AI) override;
106 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
107 const LoadInst *LI) override;
108 bool fastLowerArguments() override;
109 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
110 Register fastEmitInst_ri(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0,
112 uint64_t Imm);
113 Register fastEmitInst_r(unsigned MachineInstOpcode,
114 const TargetRegisterClass *RC, Register Op0);
115 Register fastEmitInst_rr(unsigned MachineInstOpcode,
116 const TargetRegisterClass *RC, Register Op0,
117 Register Op1);
118
119 bool fastLowerCall(CallLoweringInfo &CLI) override;
120
121 // Instruction selection routines.
122 private:
123 bool SelectLoad(const Instruction *I);
124 bool SelectStore(const Instruction *I);
125 bool SelectBranch(const Instruction *I);
126 bool SelectIndirectBr(const Instruction *I);
127 bool SelectFPExt(const Instruction *I);
128 bool SelectFPTrunc(const Instruction *I);
129 bool SelectIToFP(const Instruction *I, bool IsSigned);
130 bool SelectFPToI(const Instruction *I, bool IsSigned);
131 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
132 bool SelectRet(const Instruction *I);
133 bool SelectTrunc(const Instruction *I);
134 bool SelectIntExt(const Instruction *I);
135
136 // Utility routines.
137 private:
138 bool isTypeLegal(Type *Ty, MVT &VT);
139 bool isLoadTypeLegal(Type *Ty, MVT &VT);
140 bool isValueAvailable(const Value *V) const;
141 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSFRCRegClassID;
143 }
144 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
145 return RC->getID() == PPC::VSSRCRegClassID;
146 }
147 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
148 RegState Flag = {}, unsigned SubReg = 0) {
149 Register TmpReg = createResultReg(ToRC);
150 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
151 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
152 return TmpReg;
153 }
154 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
155 Register DestReg, const PPC::Predicate Pred);
156 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
157 const TargetRegisterClass *RC, bool IsZExt = true,
158 unsigned FP64LoadOpc = PPC::LFD);
159 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
160 bool PPCComputeAddress(const Value *Obj, Address &Addr);
161 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
162 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
163 bool IsZExt);
164 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
165 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
166 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
167 bool UseSExt = true);
168 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
169 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
170 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
171 bool IsSigned);
172 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
173
174 // Call handling routines.
175 private:
176 bool processCallArgs(SmallVectorImpl<Value *> &Args,
178 SmallVectorImpl<MVT> &ArgVTs,
181 unsigned &NumBytes, bool IsVarArg);
182 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
183
184 private:
185 #include "PPCGenFastISel.inc"
186
187};
188
189} // end anonymous namespace
190
191static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
192 switch (Pred) {
193 // These are not representable with any single compare.
196 // Major concern about the following 6 cases is NaN result. The comparison
197 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
198 // only one of which will be set. The result is generated by fcmpu
199 // instruction. However, bc instruction only inspects one of the first 3
200 // bits, so when un is set, bc instruction may jump to an undesired
201 // place.
202 //
203 // More specifically, if we expect an unordered comparison and un is set, we
204 // expect to always go to true branch; in such case UEQ, UGT and ULT still
205 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
206 // since they are tested by inspecting !eq, !lt, !gt, respectively.
207 //
208 // Similarly, for ordered comparison, when un is set, we always expect the
209 // result to be false. In such case OGT, OLT and OEQ is good, since they are
210 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
211 // and ONE are tested through !lt, !gt and !eq, and these are true.
218 default:
219 return std::nullopt;
220
222 case CmpInst::ICMP_EQ:
223 return PPC::PRED_EQ;
224
228 return PPC::PRED_GT;
229
233 return PPC::PRED_GE;
234
238 return PPC::PRED_LT;
239
243 return PPC::PRED_LE;
244
246 case CmpInst::ICMP_NE:
247 return PPC::PRED_NE;
248
250 return PPC::PRED_NU;
251
253 return PPC::PRED_UN;
254 }
255}
256
257// Determine whether the type Ty is simple enough to be handled by
258// fast-isel, and return its equivalent machine type in VT.
259// FIXME: Copied directly from ARM -- factor into base class?
260bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
261 EVT Evt = TLI.getValueType(DL, Ty, true);
262
263 // Only handle simple types.
264 if (Evt == MVT::Other || !Evt.isSimple()) return false;
265 VT = Evt.getSimpleVT();
266
267 // Handle all legal types, i.e. a register that will directly hold this
268 // value.
269 return TLI.isTypeLegal(VT);
270}
271
272// Determine whether the type Ty is simple enough to be handled by
273// fast-isel as a load target, and return its equivalent machine type in VT.
274bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
275 if (isTypeLegal(Ty, VT)) return true;
276
277 // If this is a type than can be sign or zero-extended to a basic operation
278 // go ahead and accept it now.
279 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
280 return true;
281 }
282
283 return false;
284}
285
286bool PPCFastISel::isValueAvailable(const Value *V) const {
287 if (!isa<Instruction>(V))
288 return true;
289
290 const auto *I = cast<Instruction>(V);
291 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB;
292}
293
294// Given a value Obj, create an Address object Addr that represents its
295// address. Return false if we can't handle it.
296bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
297 const User *U = nullptr;
298 unsigned Opcode = Instruction::UserOp1;
299 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
300 // Don't walk into other basic blocks unless the object is an alloca from
301 // another block, otherwise it may not have a virtual register assigned.
302 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
303 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) {
304 Opcode = I->getOpcode();
305 U = I;
306 }
307 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
308 Opcode = C->getOpcode();
309 U = C;
310 }
311
312 switch (Opcode) {
313 default:
314 break;
315 case Instruction::BitCast:
316 // Look through bitcasts.
317 return PPCComputeAddress(U->getOperand(0), Addr);
318 case Instruction::IntToPtr:
319 // Look past no-op inttoptrs.
320 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
321 TLI.getPointerTy(DL))
322 return PPCComputeAddress(U->getOperand(0), Addr);
323 break;
324 case Instruction::PtrToInt:
325 // Look past no-op ptrtoints.
326 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
327 return PPCComputeAddress(U->getOperand(0), Addr);
328 break;
329 case Instruction::GetElementPtr: {
330 Address SavedAddr = Addr;
331 int64_t TmpOffset = Addr.Offset;
332
333 // Iterate through the GEP folding the constants into offsets where
334 // we can.
336 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
337 II != IE; ++II, ++GTI) {
338 const Value *Op = *II;
339 if (StructType *STy = GTI.getStructTypeOrNull()) {
340 const StructLayout *SL = DL.getStructLayout(STy);
341 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
342 TmpOffset += SL->getElementOffset(Idx);
343 } else {
344 uint64_t S = GTI.getSequentialElementStride(DL);
345 for (;;) {
346 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
347 // Constant-offset addressing.
348 TmpOffset += CI->getSExtValue() * S;
349 break;
350 }
351 if (canFoldAddIntoGEP(U, Op)) {
352 // A compatible add with a constant operand. Fold the constant.
353 ConstantInt *CI =
354 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
355 TmpOffset += CI->getSExtValue() * S;
356 // Iterate on the other operand.
357 Op = cast<AddOperator>(Op)->getOperand(0);
358 continue;
359 }
360 // Unsupported
361 goto unsupported_gep;
362 }
363 }
364 }
365
366 // Try to grab the base operand now.
367 Addr.Offset = TmpOffset;
368 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
369
370 // We failed, restore everything and try the other options.
371 Addr = SavedAddr;
372
373 unsupported_gep:
374 break;
375 }
376 case Instruction::Alloca: {
377 const AllocaInst *AI = cast<AllocaInst>(Obj);
378 auto SI = FuncInfo.StaticAllocaMap.find(AI);
379 if (SI != FuncInfo.StaticAllocaMap.end()) {
380 Addr.BaseType = Address::FrameIndexBase;
381 Addr.Base.FI = SI->second;
382 return true;
383 }
384 break;
385 }
386 }
387
388 // FIXME: References to parameters fall through to the behavior
389 // below. They should be able to reference a frame index since
390 // they are stored to the stack, so we can get "ld rx, offset(r1)"
391 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
392 // just contain the parameter. Try to handle this with a FI.
393
394 // Try to get this in a register if nothing else has worked.
395 if (Addr.Base.Reg == 0)
396 Addr.Base.Reg = getRegForValue(Obj);
397
398 // Prevent assignment of base register to X0, which is inappropriate
399 // for loads and stores alike.
400 if (Addr.Base.Reg != 0)
401 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
402
403 return Addr.Base.Reg != 0;
404}
405
406// Fix up some addresses that can't be used directly. For example, if
407// an offset won't fit in an instruction field, we may need to move it
408// into an index register.
409void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
410 Register &IndexReg) {
411
412 // Check whether the offset fits in the instruction field.
413 if (!isInt<16>(Addr.Offset))
414 UseOffset = false;
415
416 // If this is a stack pointer and the offset needs to be simplified then
417 // put the alloca address into a register, set the base type back to
418 // register and continue. This should almost never happen.
419 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
420 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
422 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
423 Addr.Base.Reg = ResultReg;
424 Addr.BaseType = Address::RegBase;
425 }
426
427 if (!UseOffset) {
428 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
429 const ConstantInt *Offset = ConstantInt::getSigned(OffsetTy, Addr.Offset);
430 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
431 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
432 }
433}
434
435// Emit a load instruction if possible, returning true if we succeeded,
436// otherwise false. See commentary below for how the register class of
437// the load is determined.
438bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
439 const TargetRegisterClass *RC,
440 bool IsZExt, unsigned FP64LoadOpc) {
441 unsigned Opc;
442 bool UseOffset = true;
443 bool HasSPE = Subtarget->hasSPE();
444
445 // If ResultReg is given, it determines the register class of the load.
446 // Otherwise, RC is the register class to use. If the result of the
447 // load isn't anticipated in this block, both may be zero, in which
448 // case we must make a conservative guess. In particular, don't assign
449 // R0 or X0 to the result register, as the result may be used in a load,
450 // store, add-immediate, or isel that won't permit this. (Though
451 // perhaps the spill and reload of live-exit values would handle this?)
452 const TargetRegisterClass *UseRC =
453 (ResultReg ? MRI.getRegClass(ResultReg) :
454 (RC ? RC :
455 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
456 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
457 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
458 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
459
460 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
461
462 switch (VT.SimpleTy) {
463 default: // e.g., vector types not handled
464 return false;
465 case MVT::i8:
466 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
467 break;
468 case MVT::i16:
469 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
470 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
471 break;
472 case MVT::i32:
473 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
474 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
475 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
476 UseOffset = false;
477 break;
478 case MVT::i64:
479 Opc = PPC::LD;
480 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
481 "64-bit load with 32-bit target??");
482 UseOffset = ((Addr.Offset & 3) == 0);
483 break;
484 case MVT::f32:
485 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
486 break;
487 case MVT::f64:
488 Opc = FP64LoadOpc;
489 break;
490 }
491
492 // If necessary, materialize the offset into a register and use
493 // the indexed form. Also handle stack pointers with special needs.
494 Register IndexReg;
495 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
496
497 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
498 // be used.
499 bool IsVSSRC = isVSSRCRegClass(UseRC);
500 bool IsVSFRC = isVSFRCRegClass(UseRC);
501 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
502 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
503 if ((Is32VSXLoad || Is64VSXLoad) &&
504 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
505 (Addr.Offset == 0)) {
506 UseOffset = false;
507 }
508
509 if (!ResultReg)
510 ResultReg = createResultReg(UseRC);
511
512 // Note: If we still have a frame index here, we know the offset is
513 // in range, as otherwise PPCSimplifyAddress would have converted it
514 // into a RegBase.
515 if (Addr.BaseType == Address::FrameIndexBase) {
516 // VSX only provides an indexed load.
517 if (Is32VSXLoad || Is64VSXLoad) return false;
518
519 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
520 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
521 Addr.Offset),
522 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
523 MFI.getObjectAlign(Addr.Base.FI));
524
525 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
526 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
527
528 // Base reg with offset in range.
529 } else if (UseOffset) {
530 // VSX only provides an indexed load.
531 if (Is32VSXLoad || Is64VSXLoad) return false;
532
533 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
534 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
535
536 // Indexed form.
537 } else {
538 // Get the RR opcode corresponding to the RI one. FIXME: It would be
539 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
540 // is hard to get at.
541 switch (Opc) {
542 default: llvm_unreachable("Unexpected opcode!");
543 case PPC::LBZ: Opc = PPC::LBZX; break;
544 case PPC::LBZ8: Opc = PPC::LBZX8; break;
545 case PPC::LHZ: Opc = PPC::LHZX; break;
546 case PPC::LHZ8: Opc = PPC::LHZX8; break;
547 case PPC::LHA: Opc = PPC::LHAX; break;
548 case PPC::LHA8: Opc = PPC::LHAX8; break;
549 case PPC::LWZ: Opc = PPC::LWZX; break;
550 case PPC::LWZ8: Opc = PPC::LWZX8; break;
551 case PPC::LWA: Opc = PPC::LWAX; break;
552 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
553 case PPC::LD: Opc = PPC::LDX; break;
554 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
555 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
556 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
557 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
558 }
559
560 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
561 ResultReg);
562
563 // If we have an index register defined we use it in the store inst,
564 // otherwise we use X0 as base as it makes the vector instructions to
565 // use zero in the computation of the effective address regardless the
566 // content of the register.
567 if (IndexReg)
568 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
569 else
570 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
571 }
572
573 return true;
574}
575
576// Attempt to fast-select a load instruction.
577bool PPCFastISel::SelectLoad(const Instruction *I) {
578 // FIXME: No atomic loads are supported.
579 if (cast<LoadInst>(I)->isAtomic())
580 return false;
581
582 // Verify we have a legal type before going any further.
583 MVT VT;
584 if (!isLoadTypeLegal(I->getType(), VT))
585 return false;
586
587 // See if we can handle this address.
588 Address Addr;
589 if (!PPCComputeAddress(I->getOperand(0), Addr))
590 return false;
591
592 // Look at the currently assigned register for this instruction
593 // to determine the required register class. This is necessary
594 // to constrain RA from using R0/X0 when this is not legal.
595 Register AssignedReg = FuncInfo.ValueMap[I];
596 const TargetRegisterClass *RC =
597 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
598
599 Register ResultReg = 0;
600 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
601 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
602 return false;
603 updateValueMap(I, ResultReg);
604 return true;
605}
606
607// Emit a store instruction to store SrcReg at Addr.
608bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
609 assert(SrcReg && "Nothing to store!");
610 unsigned Opc;
611 bool UseOffset = true;
612
613 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
614 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
615
616 switch (VT.SimpleTy) {
617 default: // e.g., vector types not handled
618 return false;
619 case MVT::i8:
620 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
621 break;
622 case MVT::i16:
623 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
624 break;
625 case MVT::i32:
626 assert(Is32BitInt && "Not GPRC for i32??");
627 Opc = PPC::STW;
628 break;
629 case MVT::i64:
630 Opc = PPC::STD;
631 UseOffset = ((Addr.Offset & 3) == 0);
632 break;
633 case MVT::f32:
634 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
635 break;
636 case MVT::f64:
637 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
638 break;
639 }
640
641 // If necessary, materialize the offset into a register and use
642 // the indexed form. Also handle stack pointers with special needs.
643 Register IndexReg;
644 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
645
646 // If this is a potential VSX store with an offset of 0, a VSX indexed store
647 // can be used.
648 bool IsVSSRC = isVSSRCRegClass(RC);
649 bool IsVSFRC = isVSFRCRegClass(RC);
650 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
651 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
652 if ((Is32VSXStore || Is64VSXStore) &&
653 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
654 (Addr.Offset == 0)) {
655 UseOffset = false;
656 }
657
658 // Note: If we still have a frame index here, we know the offset is
659 // in range, as otherwise PPCSimplifyAddress would have converted it
660 // into a RegBase.
661 if (Addr.BaseType == Address::FrameIndexBase) {
662 // VSX only provides an indexed store.
663 if (Is32VSXStore || Is64VSXStore) return false;
664
665 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
666 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
667 Addr.Offset),
668 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
669 MFI.getObjectAlign(Addr.Base.FI));
670
671 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
672 .addReg(SrcReg)
673 .addImm(Addr.Offset)
674 .addFrameIndex(Addr.Base.FI)
675 .addMemOperand(MMO);
676
677 // Base reg with offset in range.
678 } else if (UseOffset) {
679 // VSX only provides an indexed store.
680 if (Is32VSXStore || Is64VSXStore)
681 return false;
682
683 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
684 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
685
686 // Indexed form.
687 } else {
688 // Get the RR opcode corresponding to the RI one. FIXME: It would be
689 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
690 // is hard to get at.
691 switch (Opc) {
692 default: llvm_unreachable("Unexpected opcode!");
693 case PPC::STB: Opc = PPC::STBX; break;
694 case PPC::STH : Opc = PPC::STHX; break;
695 case PPC::STW : Opc = PPC::STWX; break;
696 case PPC::STB8: Opc = PPC::STBX8; break;
697 case PPC::STH8: Opc = PPC::STHX8; break;
698 case PPC::STW8: Opc = PPC::STWX8; break;
699 case PPC::STD: Opc = PPC::STDX; break;
700 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
701 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
702 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
703 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
704 }
705
706 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
707 .addReg(SrcReg);
708
709 // If we have an index register defined we use it in the store inst,
710 // otherwise we use X0 as base as it makes the vector instructions to
711 // use zero in the computation of the effective address regardless the
712 // content of the register.
713 if (IndexReg)
714 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
715 else
716 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
717 }
718
719 return true;
720}
721
722// Attempt to fast-select a store instruction.
723bool PPCFastISel::SelectStore(const Instruction *I) {
724 Value *Op0 = I->getOperand(0);
725 Register SrcReg;
726
727 // FIXME: No atomics loads are supported.
728 if (cast<StoreInst>(I)->isAtomic())
729 return false;
730
731 // Verify we have a legal type before going any further.
732 MVT VT;
733 if (!isLoadTypeLegal(Op0->getType(), VT))
734 return false;
735
736 // Get the value to be stored into a register.
737 SrcReg = getRegForValue(Op0);
738 if (!SrcReg)
739 return false;
740
741 // See if we can handle this address.
742 Address Addr;
743 if (!PPCComputeAddress(I->getOperand(1), Addr))
744 return false;
745
746 if (!PPCEmitStore(VT, SrcReg, Addr))
747 return false;
748
749 return true;
750}
751
752// Attempt to fast-select a branch instruction.
753bool PPCFastISel::SelectBranch(const Instruction *I) {
754 const CondBrInst *BI = cast<CondBrInst>(I);
755 MachineBasicBlock *BrBB = FuncInfo.MBB;
756 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
757 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
758
759 // For now, just try the simplest case where it's fed by a compare.
760 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
761 if (isValueAvailable(CI)) {
762 std::optional<PPC::Predicate> OptPPCPred =
763 getComparePred(CI->getPredicate());
764 if (!OptPPCPred)
765 return false;
766
767 PPC::Predicate PPCPred = *OptPPCPred;
768
769 // Take advantage of fall-through opportunities.
770 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
771 std::swap(TBB, FBB);
772 PPCPred = PPC::InvertPredicate(PPCPred);
773 }
774
775 Register CondReg = createResultReg(&PPC::CRRCRegClass);
776
777 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
778 CondReg, PPCPred))
779 return false;
780
781 BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
782 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
783 .addReg(CondReg)
784 .addMBB(TBB);
785 finishCondBranch(BI->getParent(), TBB, FBB);
786 return true;
787 }
788 } else if (const ConstantInt *CI =
790 uint64_t Imm = CI->getZExtValue();
791 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
792 fastEmitBranch(Target, MIMD.getDL());
793 return true;
794 }
795
796 // FIXME: ARM looks for a case where the block containing the compare
797 // has been split from the block containing the branch. If this happens,
798 // there is a vreg available containing the result of the compare. I'm
799 // not sure we can do much, as we've lost the predicate information with
800 // the compare instruction -- we have a 4-bit CR but don't know which bit
801 // to test here.
802 return false;
803}
804
805// Attempt to emit a compare of the two source values. Signed and unsigned
806// comparisons are supported. Return false if we can't handle it.
807bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
808 bool IsZExt, Register DestReg,
809 const PPC::Predicate Pred) {
810 Type *Ty = SrcValue1->getType();
811 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
812 if (!SrcEVT.isSimple())
813 return false;
814 MVT SrcVT = SrcEVT.getSimpleVT();
815
816 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
817 return false;
818
819 // See if operand 2 is an immediate encodeable in the compare.
820 // FIXME: Operands are not in canonical order at -O0, so an immediate
821 // operand in position 1 is a lost opportunity for now. We are
822 // similar to ARM in this regard.
823 int64_t Imm = 0;
824 bool UseImm = false;
825 const bool HasSPE = Subtarget->hasSPE();
826
827 // Only 16-bit integer constants can be represented in compares for
828 // PowerPC. Others will be materialized into a register.
829 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
830 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
831 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
832 const APInt &CIVal = ConstInt->getValue();
833 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
834 (int64_t)CIVal.getSExtValue();
835 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
836 UseImm = true;
837 }
838 }
839
840 Register SrcReg1 = getRegForValue(SrcValue1);
841 if (!SrcReg1)
842 return false;
843
844 Register SrcReg2;
845 if (!UseImm) {
846 SrcReg2 = getRegForValue(SrcValue2);
847 if (!SrcReg2)
848 return false;
849 }
850
851 unsigned CmpOpc;
852 bool NeedsExt = false;
853
854 auto RC1 = MRI.getRegClass(SrcReg1);
855 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
856
857 switch (SrcVT.SimpleTy) {
858 default: return false;
859 case MVT::f32:
860 if (HasSPE) {
861 switch (Pred) {
862 default: return false;
863 case PPC::PRED_EQ:
864 CmpOpc = PPC::EFSCMPEQ;
865 break;
866 case PPC::PRED_LT:
867 CmpOpc = PPC::EFSCMPLT;
868 break;
869 case PPC::PRED_GT:
870 CmpOpc = PPC::EFSCMPGT;
871 break;
872 }
873 } else {
874 CmpOpc = PPC::FCMPUS;
875 if (isVSSRCRegClass(RC1))
876 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
877 if (RC2 && isVSSRCRegClass(RC2))
878 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
879 }
880 break;
881 case MVT::f64:
882 if (HasSPE) {
883 switch (Pred) {
884 default: return false;
885 case PPC::PRED_EQ:
886 CmpOpc = PPC::EFDCMPEQ;
887 break;
888 case PPC::PRED_LT:
889 CmpOpc = PPC::EFDCMPLT;
890 break;
891 case PPC::PRED_GT:
892 CmpOpc = PPC::EFDCMPGT;
893 break;
894 }
895 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
896 CmpOpc = PPC::XSCMPUDP;
897 } else {
898 CmpOpc = PPC::FCMPUD;
899 }
900 break;
901 case MVT::i1:
902 case MVT::i8:
903 case MVT::i16:
904 NeedsExt = true;
905 [[fallthrough]];
906 case MVT::i32:
907 if (!UseImm)
908 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
909 else
910 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
911 break;
912 case MVT::i64:
913 if (!UseImm)
914 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
915 else
916 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
917 break;
918 }
919
920 if (NeedsExt) {
921 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
922 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
923 return false;
924 SrcReg1 = ExtReg;
925
926 if (!UseImm) {
927 Register ExtReg = createResultReg(&PPC::GPRCRegClass);
928 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
929 return false;
930 SrcReg2 = ExtReg;
931 }
932 }
933
934 if (!UseImm)
935 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
936 .addReg(SrcReg1).addReg(SrcReg2);
937 else
938 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc), DestReg)
939 .addReg(SrcReg1).addImm(Imm);
940
941 return true;
942}
943
944// Attempt to fast-select a floating-point extend instruction.
945bool PPCFastISel::SelectFPExt(const Instruction *I) {
946 Value *Src = I->getOperand(0);
947 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
948 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
949
950 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
951 return false;
952
953 Register SrcReg = getRegForValue(Src);
954 if (!SrcReg)
955 return false;
956
957 // No code is generated for a FP extend.
958 updateValueMap(I, SrcReg);
959 return true;
960}
961
962// Attempt to fast-select a floating-point truncate instruction.
963bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
964 Value *Src = I->getOperand(0);
965 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
966 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
967
968 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
969 return false;
970
971 Register SrcReg = getRegForValue(Src);
972 if (!SrcReg)
973 return false;
974
975 // Round the result to single precision.
976 Register DestReg;
977 auto RC = MRI.getRegClass(SrcReg);
978 if (Subtarget->hasSPE()) {
979 DestReg = createResultReg(&PPC::GPRCRegClass);
980 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
981 DestReg)
982 .addReg(SrcReg);
983 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
984 DestReg = createResultReg(&PPC::VSSRCRegClass);
985 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
986 DestReg)
987 .addReg(SrcReg);
988 } else {
989 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
990 DestReg = createResultReg(&PPC::F4RCRegClass);
991 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
992 TII.get(PPC::FRSP), DestReg)
993 .addReg(SrcReg);
994 }
995
996 updateValueMap(I, DestReg);
997 return true;
998}
999
1000// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1001// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1002// those should be used instead of moving via a stack slot when the
1003// subtarget permits.
1004// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1005// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1006// case to 8 bytes which produces tighter code but wastes stack space.
1007Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1008 bool IsSigned) {
1009
1010 // If necessary, extend 32-bit int to 64-bit.
1011 if (SrcVT == MVT::i32) {
1012 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1013 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1014 return Register();
1015 SrcReg = TmpReg;
1016 }
1017
1018 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1019 Address Addr;
1020 Addr.BaseType = Address::FrameIndexBase;
1021 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1022
1023 // Store the value from the GPR.
1024 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1025 return Register();
1026
1027 // Load the integer value into an FPR. The kind of load used depends
1028 // on a number of conditions.
1029 unsigned LoadOpc = PPC::LFD;
1030
1031 if (SrcVT == MVT::i32) {
1032 if (!IsSigned) {
1033 LoadOpc = PPC::LFIWZX;
1034 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1035 } else if (Subtarget->hasLFIWAX()) {
1036 LoadOpc = PPC::LFIWAX;
1037 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1038 }
1039 }
1040
1041 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1042 Register ResultReg;
1043 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1044 return Register();
1045
1046 return ResultReg;
1047}
1048
1049// Attempt to fast-select an integer-to-floating-point conversion.
1050// FIXME: Once fast-isel has better support for VSX, conversions using
1051// direct moves should be implemented.
1052bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1053 MVT DstVT;
1054 Type *DstTy = I->getType();
1055 if (!isTypeLegal(DstTy, DstVT))
1056 return false;
1057
1058 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1059 return false;
1060
1061 Value *Src = I->getOperand(0);
1062 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1063 if (!SrcEVT.isSimple())
1064 return false;
1065
1066 MVT SrcVT = SrcEVT.getSimpleVT();
1067
1068 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1069 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1070 return false;
1071
1072 Register SrcReg = getRegForValue(Src);
1073 if (!SrcReg)
1074 return false;
1075
1076 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1077 if (Subtarget->hasSPE()) {
1078 unsigned Opc;
1079 if (DstVT == MVT::f32)
1080 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1081 else
1082 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1083
1084 Register DestReg = createResultReg(&PPC::SPERCRegClass);
1085 // Generate the convert.
1086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1087 .addReg(SrcReg);
1088 updateValueMap(I, DestReg);
1089 return true;
1090 }
1091
1092 // We can only lower an unsigned convert if we have the newer
1093 // floating-point conversion operations.
1094 if (!IsSigned && !Subtarget->hasFPCVT())
1095 return false;
1096
1097 // FIXME: For now we require the newer floating-point conversion operations
1098 // (which are present only on P7 and A2 server models) when converting
1099 // to single-precision float. Otherwise we have to generate a lot of
1100 // fiddly code to avoid double rounding. If necessary, the fiddly code
1101 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1102 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1103 return false;
1104
1105 // Extend the input if necessary.
1106 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1107 Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1108 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1109 return false;
1110 SrcVT = MVT::i64;
1111 SrcReg = TmpReg;
1112 }
1113
1114 // Move the integer value to an FPR.
1115 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1116 if (!FPReg)
1117 return false;
1118
1119 // Determine the opcode for the conversion.
1120 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1121 Register DestReg = createResultReg(RC);
1122 unsigned Opc;
1123
1124 if (DstVT == MVT::f32)
1125 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1126 else
1127 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1128
1129 // Generate the convert.
1130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1131 .addReg(FPReg);
1132
1133 updateValueMap(I, DestReg);
1134 return true;
1135}
1136
1137// Move the floating-point value in SrcReg into an integer destination
1138// register, and return the register (or zero if we can't handle it).
1139// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1140// those should be used instead of moving via a stack slot when the
1141// subtarget permits.
1142Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1143 Register SrcReg, bool IsSigned) {
1144 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1145 // Note that if have STFIWX available, we could use a 4-byte stack
1146 // slot for i32, but this being fast-isel we'll just go with the
1147 // easiest code gen possible.
1148 Address Addr;
1149 Addr.BaseType = Address::FrameIndexBase;
1150 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1151
1152 // Store the value from the FPR.
1153 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1154 return Register();
1155
1156 // Reload it into a GPR. If we want an i32 on big endian, modify the
1157 // address to have a 4-byte offset so we load from the right place.
1158 if (VT == MVT::i32)
1159 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1160
1161 // Look at the currently assigned register for this instruction
1162 // to determine the required register class.
1163 Register AssignedReg = FuncInfo.ValueMap[I];
1164 const TargetRegisterClass *RC =
1165 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1166
1167 Register ResultReg;
1168 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1169 return Register();
1170
1171 return ResultReg;
1172}
1173
1174// Attempt to fast-select a floating-point-to-integer conversion.
1175// FIXME: Once fast-isel has better support for VSX, conversions using
1176// direct moves should be implemented.
1177bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1178 MVT DstVT, SrcVT;
1179 Type *DstTy = I->getType();
1180 if (!isTypeLegal(DstTy, DstVT))
1181 return false;
1182
1183 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1184 return false;
1185
1186 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1187 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1188 !Subtarget->hasSPE())
1189 return false;
1190
1191 Value *Src = I->getOperand(0);
1192 Type *SrcTy = Src->getType();
1193 if (!isTypeLegal(SrcTy, SrcVT))
1194 return false;
1195
1196 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1197 return false;
1198
1199 Register SrcReg = getRegForValue(Src);
1200 if (!SrcReg)
1201 return false;
1202
1203 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1204 // meaningless copy to get the register class right.
1205 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1206 if (InRC == &PPC::F4RCRegClass)
1207 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1208 else if (InRC == &PPC::VSSRCRegClass)
1209 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1210
1211 // Determine the opcode for the conversion, which takes place
1212 // entirely within FPRs or VSRs.
1213 Register DestReg;
1214 unsigned Opc;
1215 auto RC = MRI.getRegClass(SrcReg);
1216
1217 if (Subtarget->hasSPE()) {
1218 DestReg = createResultReg(&PPC::GPRCRegClass);
1219 if (IsSigned)
1220 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1221 else
1222 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1223 } else if (isVSFRCRegClass(RC)) {
1224 DestReg = createResultReg(&PPC::VSFRCRegClass);
1225 if (DstVT == MVT::i32)
1226 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1227 else
1228 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1229 } else {
1230 DestReg = createResultReg(&PPC::F8RCRegClass);
1231 if (DstVT == MVT::i32)
1232 if (IsSigned)
1233 Opc = PPC::FCTIWZ;
1234 else
1235 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1236 else
1237 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1238 }
1239
1240 // Generate the convert.
1241 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1242 .addReg(SrcReg);
1243
1244 // Now move the integer value from a float register to an integer register.
1245 Register IntReg = Subtarget->hasSPE()
1246 ? DestReg
1247 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1248
1249 if (!IntReg)
1250 return false;
1251
1252 updateValueMap(I, IntReg);
1253 return true;
1254}
1255
1256// Attempt to fast-select a binary integer operation that isn't already
1257// handled automatically.
1258bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1259 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1260
1261 // We can get here in the case when we have a binary operation on a non-legal
1262 // type and the target independent selector doesn't know how to handle it.
1263 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1264 return false;
1265
1266 // Look at the currently assigned register for this instruction
1267 // to determine the required register class. If there is no register,
1268 // make a conservative choice (don't assign R0).
1269 Register AssignedReg = FuncInfo.ValueMap[I];
1270 const TargetRegisterClass *RC =
1271 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1272 &PPC::GPRC_and_GPRC_NOR0RegClass);
1273 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1274
1275 unsigned Opc;
1276 switch (ISDOpcode) {
1277 default: return false;
1278 case ISD::ADD:
1279 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1280 break;
1281 case ISD::OR:
1282 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1283 break;
1284 case ISD::SUB:
1285 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1286 break;
1287 }
1288
1289 Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1290 Register SrcReg1 = getRegForValue(I->getOperand(0));
1291 if (!SrcReg1)
1292 return false;
1293
1294 // Handle case of small immediate operand.
1295 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1296 const APInt &CIVal = ConstInt->getValue();
1297 int Imm = (int)CIVal.getSExtValue();
1298 bool UseImm = true;
1299 if (isInt<16>(Imm)) {
1300 switch (Opc) {
1301 default:
1302 llvm_unreachable("Missing case!");
1303 case PPC::ADD4:
1304 Opc = PPC::ADDI;
1305 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1306 break;
1307 case PPC::ADD8:
1308 Opc = PPC::ADDI8;
1309 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1310 break;
1311 case PPC::OR:
1312 Opc = PPC::ORI;
1313 break;
1314 case PPC::OR8:
1315 Opc = PPC::ORI8;
1316 break;
1317 case PPC::SUBF:
1318 if (Imm == -32768)
1319 UseImm = false;
1320 else {
1321 Opc = PPC::ADDI;
1322 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1323 Imm = -Imm;
1324 }
1325 break;
1326 case PPC::SUBF8:
1327 if (Imm == -32768)
1328 UseImm = false;
1329 else {
1330 Opc = PPC::ADDI8;
1331 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1332 Imm = -Imm;
1333 }
1334 break;
1335 }
1336
1337 if (UseImm) {
1338 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
1339 ResultReg)
1340 .addReg(SrcReg1)
1341 .addImm(Imm);
1342 updateValueMap(I, ResultReg);
1343 return true;
1344 }
1345 }
1346 }
1347
1348 // Reg-reg case.
1349 Register SrcReg2 = getRegForValue(I->getOperand(1));
1350 if (!SrcReg2)
1351 return false;
1352
1353 // Reverse operands for subtract-from.
1354 if (ISDOpcode == ISD::SUB)
1355 std::swap(SrcReg1, SrcReg2);
1356
1357 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
1358 .addReg(SrcReg1).addReg(SrcReg2);
1359 updateValueMap(I, ResultReg);
1360 return true;
1361}
1362
1363// Handle arguments to a call that we're attempting to fast-select.
1364// Return false if the arguments are too complex for us at the moment.
1365bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1366 SmallVectorImpl<Register> &ArgRegs,
1367 SmallVectorImpl<MVT> &ArgVTs,
1368 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1369 SmallVectorImpl<unsigned> &RegArgs,
1370 CallingConv::ID CC, unsigned &NumBytes,
1371 bool IsVarArg) {
1373 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1374
1375 // Reserve space for the linkage area on the stack.
1376 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1377 CCInfo.AllocateStack(LinkageSize, Align(8));
1378
1380 for (Value *Arg : Args)
1381 ArgTys.push_back(Arg->getType());
1382 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, ArgTys, CC_PPC64_ELF_FIS);
1383
1384 // Bail out if we can't handle any of the arguments.
1385 for (const CCValAssign &VA : ArgLocs) {
1386 MVT ArgVT = ArgVTs[VA.getValNo()];
1387
1388 // Skip vector arguments for now, as well as long double and
1389 // uint128_t, and anything that isn't passed in a register.
1390 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1391 !VA.isRegLoc() || VA.needsCustom())
1392 return false;
1393
1394 // Skip bit-converted arguments for now.
1395 if (VA.getLocInfo() == CCValAssign::BCvt)
1396 return false;
1397 }
1398
1399 // Get a count of how many bytes are to be pushed onto the stack.
1400 NumBytes = CCInfo.getStackSize();
1401
1402 // The prolog code of the callee may store up to 8 GPR argument registers to
1403 // the stack, allowing va_start to index over them in memory if its varargs.
1404 // Because we cannot tell if this is needed on the caller side, we have to
1405 // conservatively assume that it is needed. As such, make sure we have at
1406 // least enough stack space for the caller to store the 8 GPRs.
1407 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1408 NumBytes = std::max(NumBytes, LinkageSize + 64);
1409
1410 // Issue CALLSEQ_START.
1411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1412 TII.get(TII.getCallFrameSetupOpcode()))
1413 .addImm(NumBytes).addImm(0);
1414
1415 // Prepare to assign register arguments. Every argument uses up a
1416 // GPR protocol register even if it's passed in a floating-point
1417 // register (unless we're using the fast calling convention).
1418 unsigned NextGPR = PPC::X3;
1419 unsigned NextFPR = PPC::F1;
1420
1421 // Process arguments.
1422 for (const CCValAssign &VA : ArgLocs) {
1423 Register Arg = ArgRegs[VA.getValNo()];
1424 MVT ArgVT = ArgVTs[VA.getValNo()];
1425
1426 // Handle argument promotion and bitcasts.
1427 switch (VA.getLocInfo()) {
1428 default:
1429 llvm_unreachable("Unknown loc info!");
1430 case CCValAssign::Full:
1431 break;
1432 case CCValAssign::SExt: {
1433 MVT DestVT = VA.getLocVT();
1434 const TargetRegisterClass *RC =
1435 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1436 Register TmpReg = createResultReg(RC);
1437 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1438 llvm_unreachable("Failed to emit a sext!");
1439 ArgVT = DestVT;
1440 Arg = TmpReg;
1441 break;
1442 }
1443 case CCValAssign::AExt:
1444 case CCValAssign::ZExt: {
1445 MVT DestVT = VA.getLocVT();
1446 const TargetRegisterClass *RC =
1447 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1448 Register TmpReg = createResultReg(RC);
1449 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1450 llvm_unreachable("Failed to emit a zext!");
1451 ArgVT = DestVT;
1452 Arg = TmpReg;
1453 break;
1454 }
1455 case CCValAssign::BCvt: {
1456 // FIXME: Not yet handled.
1457 llvm_unreachable("Should have bailed before getting here!");
1458 break;
1459 }
1460 }
1461
1462 // Copy this argument to the appropriate register.
1463 unsigned ArgReg;
1464 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1465 ArgReg = NextFPR++;
1466 if (CC != CallingConv::Fast)
1467 ++NextGPR;
1468 } else
1469 ArgReg = NextGPR++;
1470
1471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1472 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1473 RegArgs.push_back(ArgReg);
1474 }
1475
1476 return true;
1477}
1478
1479// For a call that we've determined we can fast-select, finish the
1480// call sequence and generate a copy to obtain the return value (if any).
1481bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1482 CallingConv::ID CC = CLI.CallConv;
1483
1484 // Issue CallSEQ_END.
1485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1486 TII.get(TII.getCallFrameDestroyOpcode()))
1487 .addImm(NumBytes).addImm(0);
1488
1489 // Next, generate a copy to obtain the return value.
1490 // FIXME: No multi-register return values yet, though I don't foresee
1491 // any real difficulties there.
1492 if (RetVT != MVT::isVoid) {
1494 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1495 CCInfo.AnalyzeCallResult(RetVT, CLI.RetTy, RetCC_PPC64_ELF_FIS);
1496 CCValAssign &VA = RVLocs[0];
1497 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1498 assert(VA.isRegLoc() && "Can only return in registers!");
1499
1500 MVT DestVT = VA.getValVT();
1501 MVT CopyVT = DestVT;
1502
1503 // Ints smaller than a register still arrive in a full 64-bit
1504 // register, so make sure we recognize this.
1505 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1506 CopyVT = MVT::i64;
1507
1508 Register SourcePhysReg = VA.getLocReg();
1509 Register ResultReg;
1510
1511 if (RetVT == CopyVT) {
1512 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1513 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1514
1515 // If necessary, round the floating result to single precision.
1516 } else if (CopyVT == MVT::f64) {
1517 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1518 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1519 ResultReg).addReg(SourcePhysReg);
1520
1521 // If only the low half of a general register is needed, generate
1522 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1523 // used along the fast-isel path (not lowered), and downstream logic
1524 // also doesn't like a direct subreg copy on a physical reg.)
1525 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1526 // Convert physical register from G8RC to GPRC.
1527 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1528 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1529 }
1530
1531 assert(ResultReg && "ResultReg unset!");
1532 CLI.InRegs.push_back(SourcePhysReg);
1533 CLI.ResultReg = ResultReg;
1534 CLI.NumResultRegs = 1;
1535 }
1536
1537 return true;
1538}
1539
1540bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1541 CallingConv::ID CC = CLI.CallConv;
1542 bool IsTailCall = CLI.IsTailCall;
1543 bool IsVarArg = CLI.IsVarArg;
1544 const Value *Callee = CLI.Callee;
1545 const MCSymbol *Symbol = CLI.Symbol;
1546
1547 if (!Callee && !Symbol)
1548 return false;
1549
1550 // Allow SelectionDAG isel to handle tail calls and long calls.
1551 if (IsTailCall || Subtarget->useLongCalls())
1552 return false;
1553
1554 // Let SDISel handle vararg functions.
1555 if (IsVarArg)
1556 return false;
1557
1558 // If this is a PC-Rel function, let SDISel handle the call.
1559 if (Subtarget->isUsingPCRelativeCalls())
1560 return false;
1561
1562 // Handle simple calls for now, with legal return types and
1563 // those that can be extended.
1564 Type *RetTy = CLI.RetTy;
1565 MVT RetVT;
1566 if (RetTy->isVoidTy())
1567 RetVT = MVT::isVoid;
1568 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1569 RetVT != MVT::i8)
1570 return false;
1571 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1572 // We can't handle boolean returns when CR bits are in use.
1573 return false;
1574
1575 // FIXME: No multi-register return values yet.
1576 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1577 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1578 RetVT != MVT::f64) {
1580 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1581 CCInfo.AnalyzeCallResult(RetVT, RetTy, RetCC_PPC64_ELF_FIS);
1582 if (RVLocs.size() > 1)
1583 return false;
1584 }
1585
1586 // Bail early if more than 8 arguments, as we only currently
1587 // handle arguments passed in registers.
1588 unsigned NumArgs = CLI.OutVals.size();
1589 if (NumArgs > 8)
1590 return false;
1591
1592 // Set up the argument vectors.
1593 SmallVector<Value*, 8> Args;
1595 SmallVector<MVT, 8> ArgVTs;
1597
1598 Args.reserve(NumArgs);
1599 ArgRegs.reserve(NumArgs);
1600 ArgVTs.reserve(NumArgs);
1601 ArgFlags.reserve(NumArgs);
1602
1603 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1604 // Only handle easy calls for now. It would be reasonably easy
1605 // to handle <= 8-byte structures passed ByVal in registers, but we
1606 // have to ensure they are right-justified in the register.
1607 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1608 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1609 return false;
1610
1611 Value *ArgValue = CLI.OutVals[i];
1612 Type *ArgTy = ArgValue->getType();
1613 MVT ArgVT;
1614 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1615 return false;
1616
1617 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1618 // types, which is passed through vector register. Skip these types and
1619 // fallback to default SelectionDAG based selection.
1620 if (ArgVT.isVector() || ArgVT == MVT::f128)
1621 return false;
1622
1623 Register Arg = getRegForValue(ArgValue);
1624 if (!Arg)
1625 return false;
1626
1627 Args.push_back(ArgValue);
1628 ArgRegs.push_back(Arg);
1629 ArgVTs.push_back(ArgVT);
1630 ArgFlags.push_back(Flags);
1631 }
1632
1633 // Process the arguments.
1634 SmallVector<unsigned, 8> RegArgs;
1635 unsigned NumBytes;
1636
1637 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1638 RegArgs, CC, NumBytes, IsVarArg))
1639 return false;
1640
1641 MachineInstrBuilder MIB;
1642 // FIXME: No handling for function pointers yet. This requires
1643 // implementing the function descriptor (OPD) setup.
1644 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1645 if (!GV) {
1646 // patchpoints are a special case; they always dispatch to a pointer value.
1647 // However, we don't actually want to generate the indirect call sequence
1648 // here (that will be generated, as necessary, during asm printing), and
1649 // the call we generate here will be erased by FastISel::selectPatchpoint,
1650 // so don't try very hard...
1651 if (CLI.IsPatchPoint)
1652 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1653 else
1654 return false;
1655 } else {
1656 // Build direct call with NOP for TOC restore.
1657 // FIXME: We can and should optimize away the NOP for local calls.
1658 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1659 TII.get(PPC::BL8_NOP));
1660 // Add callee.
1661 MIB.addGlobalAddress(GV);
1662 }
1663
1664 // Add implicit physical register uses to the call.
1665 for (unsigned Reg : RegArgs)
1666 MIB.addReg(Reg, RegState::Implicit);
1667
1668 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1669 // into the call.
1670 PPCFuncInfo->setUsesTOCBasePtr();
1671 MIB.addReg(PPC::X2, RegState::Implicit);
1672
1673 // Add a register mask with the call-preserved registers. Proper
1674 // defs for return values will be added by setPhysRegsDeadExcept().
1675 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1676
1677 CLI.Call = MIB;
1678
1679 // Finish off the call including any return values.
1680 return finishCall(RetVT, CLI, NumBytes);
1681}
1682
1683// Attempt to fast-select a return instruction.
1684bool PPCFastISel::SelectRet(const Instruction *I) {
1685
1686 if (!FuncInfo.CanLowerReturn)
1687 return false;
1688
1689 const ReturnInst *Ret = cast<ReturnInst>(I);
1690 const Function &F = *I->getParent()->getParent();
1691
1692 // Build a list of return value registers.
1694 CallingConv::ID CC = F.getCallingConv();
1695
1696 if (Ret->getNumOperands() > 0) {
1698 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1699
1700 // Analyze operands of the call, assigning locations to each operand.
1702 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1703 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1704 const Value *RV = Ret->getOperand(0);
1705
1706 // FIXME: Only one output register for now.
1707 if (ValLocs.size() > 1)
1708 return false;
1709
1710 // Special case for returning a constant integer of any size - materialize
1711 // the constant as an i64 and copy it to the return register.
1712 if (isa<ConstantInt>(RV) && RV->getType()->isIntegerTy()) {
1713 const ConstantInt *CI = cast<ConstantInt>(RV);
1714 CCValAssign &VA = ValLocs[0];
1715
1716 Register RetReg = VA.getLocReg();
1717 // We still need to worry about properly extending the sign. For example,
1718 // we could have only a single bit or a constant that needs zero
1719 // extension rather than sign extension. Make sure we pass the return
1720 // value extension property to integer materialization.
1721 Register SrcReg =
1722 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1723
1724 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1725 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1726
1727 RetRegs.push_back(RetReg);
1728
1729 } else {
1730 Register Reg = getRegForValue(RV);
1731
1732 if (!Reg)
1733 return false;
1734
1735 // Copy the result values into the output registers.
1736 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1737
1738 CCValAssign &VA = ValLocs[i];
1739 assert(VA.isRegLoc() && "Can only return in registers!");
1740 RetRegs.push_back(VA.getLocReg());
1741 Register SrcReg = Reg + VA.getValNo();
1742
1743 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1744 if (!RVEVT.isSimple())
1745 return false;
1746 MVT RVVT = RVEVT.getSimpleVT();
1747 MVT DestVT = VA.getLocVT();
1748
1749 if (RVVT != DestVT && RVVT != MVT::i8 &&
1750 RVVT != MVT::i16 && RVVT != MVT::i32)
1751 return false;
1752
1753 if (RVVT != DestVT) {
1754 switch (VA.getLocInfo()) {
1755 default:
1756 llvm_unreachable("Unknown loc info!");
1757 case CCValAssign::Full:
1758 llvm_unreachable("Full value assign but types don't match?");
1759 case CCValAssign::AExt:
1760 case CCValAssign::ZExt: {
1761 const TargetRegisterClass *RC =
1762 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1763 Register TmpReg = createResultReg(RC);
1764 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1765 return false;
1766 SrcReg = TmpReg;
1767 break;
1768 }
1769 case CCValAssign::SExt: {
1770 const TargetRegisterClass *RC =
1771 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1772 Register TmpReg = createResultReg(RC);
1773 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1774 return false;
1775 SrcReg = TmpReg;
1776 break;
1777 }
1778 }
1779 }
1780
1781 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1782 TII.get(TargetOpcode::COPY), RetRegs[i])
1783 .addReg(SrcReg);
1784 }
1785 }
1786 }
1787
1788 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1789 TII.get(PPC::BLR8));
1790
1791 for (Register Reg : RetRegs)
1792 MIB.addReg(Reg, RegState::Implicit);
1793
1794 return true;
1795}
1796
1797// Attempt to emit an integer extend of SrcReg into DestReg. Both
1798// signed and zero extensions are supported. Return false if we
1799// can't handle it.
1800bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1801 Register DestReg, bool IsZExt) {
1802 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1803 return false;
1804 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1805 return false;
1806
1807 // Signed extensions use EXTSB, EXTSH, EXTSW.
1808 if (!IsZExt) {
1809 unsigned Opc;
1810 if (SrcVT == MVT::i8)
1811 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1812 else if (SrcVT == MVT::i16)
1813 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1814 else {
1815 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1816 Opc = PPC::EXTSW_32_64;
1817 }
1818 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
1819 .addReg(SrcReg);
1820
1821 // Unsigned 32-bit extensions use RLWINM.
1822 } else if (DestVT == MVT::i32) {
1823 unsigned MB;
1824 if (SrcVT == MVT::i8)
1825 MB = 24;
1826 else {
1827 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1828 MB = 16;
1829 }
1830 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1831 DestReg)
1832 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1833
1834 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1835 } else {
1836 unsigned MB;
1837 if (SrcVT == MVT::i8)
1838 MB = 56;
1839 else if (SrcVT == MVT::i16)
1840 MB = 48;
1841 else
1842 MB = 32;
1843 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1844 TII.get(PPC::RLDICL_32_64), DestReg)
1845 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1846 }
1847
1848 return true;
1849}
1850
1851// Attempt to fast-select an indirect branch instruction.
1852bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1853 Register AddrReg = getRegForValue(I->getOperand(0));
1854 if (!AddrReg)
1855 return false;
1856
1857 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1858 .addReg(AddrReg);
1859 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1860
1861 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1862 for (const BasicBlock *SuccBB : IB->successors())
1863 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(SuccBB));
1864
1865 return true;
1866}
1867
1868// Attempt to fast-select an integer truncate instruction.
1869bool PPCFastISel::SelectTrunc(const Instruction *I) {
1870 Value *Src = I->getOperand(0);
1871 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1872 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1873
1874 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1875 return false;
1876
1877 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1878 return false;
1879
1880 Register SrcReg = getRegForValue(Src);
1881 if (!SrcReg)
1882 return false;
1883
1884 // The only interesting case is when we need to switch register classes.
1885 if (SrcVT == MVT::i64)
1886 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, {}, PPC::sub_32);
1887
1888 updateValueMap(I, SrcReg);
1889 return true;
1890}
1891
1892// Attempt to fast-select an integer extend instruction.
1893bool PPCFastISel::SelectIntExt(const Instruction *I) {
1894 Type *DestTy = I->getType();
1895 Value *Src = I->getOperand(0);
1896 Type *SrcTy = Src->getType();
1897
1898 bool IsZExt = isa<ZExtInst>(I);
1899 Register SrcReg = getRegForValue(Src);
1900 if (!SrcReg) return false;
1901
1902 EVT SrcEVT, DestEVT;
1903 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1904 DestEVT = TLI.getValueType(DL, DestTy, true);
1905 if (!SrcEVT.isSimple())
1906 return false;
1907 if (!DestEVT.isSimple())
1908 return false;
1909
1910 MVT SrcVT = SrcEVT.getSimpleVT();
1911 MVT DestVT = DestEVT.getSimpleVT();
1912
1913 // If we know the register class needed for the result of this
1914 // instruction, use it. Otherwise pick the register class of the
1915 // correct size that does not contain X0/R0, since we don't know
1916 // whether downstream uses permit that assignment.
1917 Register AssignedReg = FuncInfo.ValueMap[I];
1918 const TargetRegisterClass *RC =
1919 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1920 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1921 &PPC::GPRC_and_GPRC_NOR0RegClass));
1922 Register ResultReg = createResultReg(RC);
1923
1924 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1925 return false;
1926
1927 updateValueMap(I, ResultReg);
1928 return true;
1929}
1930
1931// Attempt to fast-select an instruction that wasn't handled by
1932// the table-generated machinery.
1933bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1934
1935 switch (I->getOpcode()) {
1936 case Instruction::Load:
1937 return SelectLoad(I);
1938 case Instruction::Store:
1939 return SelectStore(I);
1940 case Instruction::CondBr:
1941 return SelectBranch(I);
1942 case Instruction::IndirectBr:
1943 return SelectIndirectBr(I);
1944 case Instruction::FPExt:
1945 return SelectFPExt(I);
1946 case Instruction::FPTrunc:
1947 return SelectFPTrunc(I);
1948 case Instruction::SIToFP:
1949 return SelectIToFP(I, /*IsSigned*/ true);
1950 case Instruction::UIToFP:
1951 return SelectIToFP(I, /*IsSigned*/ false);
1952 case Instruction::FPToSI:
1953 return SelectFPToI(I, /*IsSigned*/ true);
1954 case Instruction::FPToUI:
1955 return SelectFPToI(I, /*IsSigned*/ false);
1956 case Instruction::Add:
1957 return SelectBinaryIntOp(I, ISD::ADD);
1958 case Instruction::Or:
1959 return SelectBinaryIntOp(I, ISD::OR);
1960 case Instruction::Sub:
1961 return SelectBinaryIntOp(I, ISD::SUB);
1962 case Instruction::Ret:
1963 return SelectRet(I);
1964 case Instruction::Trunc:
1965 return SelectTrunc(I);
1966 case Instruction::ZExt:
1967 case Instruction::SExt:
1968 return SelectIntExt(I);
1969 // Here add other flavors of Instruction::XXX that automated
1970 // cases don't catch. For example, switches are terminators
1971 // that aren't yet handled.
1972 default:
1973 break;
1974 }
1975 return false;
1976}
1977
1978// Materialize a floating-point constant into a register, and return
1979// the register number (or zero if we failed to handle it).
1980Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1981 // If this is a PC-Rel function, let SDISel handle constant pool.
1982 if (Subtarget->isUsingPCRelativeCalls())
1983 return Register();
1984
1985 // No plans to handle long double here.
1986 if (VT != MVT::f32 && VT != MVT::f64)
1987 return Register();
1988
1989 // All FP constants are loaded from the constant pool.
1990 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1991 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
1992 const bool HasSPE = Subtarget->hasSPE();
1993 const TargetRegisterClass *RC;
1994 if (HasSPE)
1995 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1996 else
1997 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1998
1999 Register DestReg = createResultReg(RC);
2000 CodeModel::Model CModel = TM.getCodeModel();
2001
2002 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2004 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2005
2006 unsigned Opc;
2007
2008 if (HasSPE)
2009 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2010 else
2011 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2012
2013 Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2014
2015 PPCFuncInfo->setUsesTOCBasePtr();
2016 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2017 if (CModel == CodeModel::Small) {
2018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2019 TmpReg)
2020 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2022 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2023 } else {
2024 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2026 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2027 // But for large code model, we must generate a LDtocL followed
2028 // by the LF[SD].
2029 if (CModel == CodeModel::Large) {
2030 Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2031 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2032 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2033 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2034 .addImm(0)
2035 .addReg(TmpReg2);
2036 } else
2037 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
2039 .addReg(TmpReg)
2040 .addMemOperand(MMO);
2041 }
2042
2043 return DestReg;
2044}
2045
2046// Materialize the address of a global value into a register, and return
2047// the register number (or zero if we failed to handle it).
2048Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2049 // If this is a PC-Rel function, let SDISel handle GV materialization.
2050 if (Subtarget->isUsingPCRelativeCalls())
2051 return Register();
2052
2053 assert(VT == MVT::i64 && "Non-address!");
2054 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2055 Register DestReg = createResultReg(RC);
2056
2057 // Global values may be plain old object addresses, TLS object
2058 // addresses, constant pool entries, or jump tables. How we generate
2059 // code for these may depend on small, medium, or large code model.
2060 CodeModel::Model CModel = TM.getCodeModel();
2061
2062 // FIXME: Jump tables are not yet required because fast-isel doesn't
2063 // handle switches; if that changes, we need them as well. For now,
2064 // what follows assumes everything's a generic (or TLS) global address.
2065
2066 // FIXME: We don't yet handle the complexity of TLS.
2067 if (GV->isThreadLocal())
2068 return Register();
2069
2070 PPCFuncInfo->setUsesTOCBasePtr();
2071 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2072 isa<GlobalVariable>(GV) &&
2073 cast<GlobalVariable>(GV)->hasAttribute("toc-data");
2074
2075 // For small code model, generate a simple TOC load.
2076 if (CModel == CodeModel::Small) {
2077 auto MIB = BuildMI(
2078 *FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2079 IsAIXTocData ? TII.get(PPC::ADDItoc8) : TII.get(PPC::LDtoc), DestReg);
2080 if (IsAIXTocData)
2081 MIB.addReg(PPC::X2).addGlobalAddress(GV);
2082 else
2083 MIB.addGlobalAddress(GV).addReg(PPC::X2);
2084 } else {
2085 // If the address is an externally defined symbol, a symbol with common
2086 // or externally available linkage, a non-local function address, or a
2087 // jump table address (not yet needed), or if we are generating code
2088 // for large code model, we generate:
2089 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2090 // Otherwise we generate:
2091 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2092 // Either way, start with the ADDIStocHA8:
2093 Register HighPartReg = createResultReg(RC);
2094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2095 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2096
2097 if (Subtarget->isGVIndirectSymbol(GV)) {
2098 assert(!IsAIXTocData && "TOC data should always be direct.");
2099 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2100 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2101 } else {
2102 // Otherwise generate the ADDItocL8.
2103 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2104 DestReg)
2105 .addReg(HighPartReg)
2106 .addGlobalAddress(GV);
2107 }
2108 }
2109
2110 return DestReg;
2111}
2112
2113// Materialize a 32-bit integer constant into a register, and return
2114// the register number (or zero if we failed to handle it).
2115Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2116 const TargetRegisterClass *RC) {
2117 unsigned Lo = Imm & 0xFFFF;
2118 unsigned Hi = (Imm >> 16) & 0xFFFF;
2119
2120 Register ResultReg = createResultReg(RC);
2121 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2122
2123 if (isInt<16>(Imm))
2124 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2125 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2126 .addImm(Imm);
2127 else if (Lo) {
2128 // Both Lo and Hi have nonzero bits.
2129 Register TmpReg = createResultReg(RC);
2130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2131 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2132 .addImm(Hi);
2133 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2134 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2135 .addReg(TmpReg).addImm(Lo);
2136 } else
2137 // Just Hi bits.
2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2139 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2140 .addImm(Hi);
2141
2142 return ResultReg;
2143}
2144
2145// Materialize a 64-bit integer constant into a register, and return
2146// the register number (or zero if we failed to handle it).
2147Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2148 const TargetRegisterClass *RC) {
2149 unsigned Remainder = 0;
2150 unsigned Shift = 0;
2151
2152 // If the value doesn't fit in 32 bits, see if we can shift it
2153 // so that it fits in 32 bits.
2154 if (!isInt<32>(Imm)) {
2155 Shift = llvm::countr_zero<uint64_t>(Imm);
2156 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2157
2158 if (isInt<32>(ImmSh))
2159 Imm = ImmSh;
2160 else {
2161 Remainder = Imm;
2162 Shift = 32;
2163 Imm >>= 32;
2164 }
2165 }
2166
2167 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2168 // (if not shifted).
2169 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2170 if (!Shift)
2171 return TmpReg1;
2172
2173 // If upper 32 bits were not zero, we've built them and need to shift
2174 // them into place.
2175 Register TmpReg2;
2176 if (Imm) {
2177 TmpReg2 = createResultReg(RC);
2178 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2179 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2180 } else
2181 TmpReg2 = TmpReg1;
2182
2183 Register TmpReg3;
2184 unsigned Hi, Lo;
2185 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2186 TmpReg3 = createResultReg(RC);
2187 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2188 TmpReg3).addReg(TmpReg2).addImm(Hi);
2189 } else
2190 TmpReg3 = TmpReg2;
2191
2192 if ((Lo = Remainder & 0xFFFF)) {
2193 Register ResultReg = createResultReg(RC);
2194 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2195 ResultReg).addReg(TmpReg3).addImm(Lo);
2196 return ResultReg;
2197 }
2198
2199 return TmpReg3;
2200}
2201
2202// Materialize an integer constant into a register, and return
2203// the register number (or zero if we failed to handle it).
2204Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2205 bool UseSExt) {
2206 // If we're using CR bit registers for i1 values, handle that as a special
2207 // case first.
2208 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2209 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2210 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2211 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2212 return ImmReg;
2213 }
2214
2215 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2216 VT != MVT::i1)
2217 return Register();
2218
2219 const TargetRegisterClass *RC =
2220 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2221 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2222
2223 // If the constant is in range, use a load-immediate.
2224 // Since LI will sign extend the constant we need to make sure that for
2225 // our zeroext constants that the sign extended constant fits into 16-bits -
2226 // a range of 0..0x7fff.
2227 if (isInt<16>(Imm)) {
2228 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2229 Register ImmReg = createResultReg(RC);
2230 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ImmReg)
2231 .addImm(Imm);
2232 return ImmReg;
2233 }
2234
2235 // Construct the constant piecewise.
2236 if (VT == MVT::i64)
2237 return PPCMaterialize64BitInt(Imm, RC);
2238 else if (VT == MVT::i32)
2239 return PPCMaterialize32BitInt(Imm, RC);
2240
2241 return Register();
2242}
2243
2244// Materialize a constant into a register, and return the register
2245// number (or zero if we failed to handle it).
2246Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2247 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2248
2249 // Only handle simple types.
2250 if (!CEVT.isSimple())
2251 return Register();
2252 MVT VT = CEVT.getSimpleVT();
2253
2254 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2255 return PPCMaterializeFP(CFP, VT);
2256 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2257 return PPCMaterializeGV(GV, VT);
2258 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2259 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2260 // assumes that constant PHI operands will be zero extended, and failure to
2261 // match that assumption will cause problems if we sign extend here but
2262 // some user of a PHI is in a block for which we fall back to full SDAG
2263 // instruction selection.
2264 return PPCMaterializeInt(CI, VT, false);
2265
2266 return Register();
2267}
2268
2269// Materialize the address created by an alloca into a register, and
2270// return the register number (or zero if we failed to handle it).
2271Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2272 auto SI = FuncInfo.StaticAllocaMap.find(AI);
2273
2274 // Don't handle dynamic allocas.
2275 if (SI == FuncInfo.StaticAllocaMap.end())
2276 return Register();
2277
2278 MVT VT;
2279 if (!isLoadTypeLegal(AI->getType(), VT))
2280 return Register();
2281
2282 if (SI != FuncInfo.StaticAllocaMap.end()) {
2283 Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2284 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2285 ResultReg).addFrameIndex(SI->second).addImm(0);
2286 return ResultReg;
2287 }
2288
2289 return Register();
2290}
2291
2292// Fold loads into extends when possible.
2293// FIXME: We can have multiple redundant extend/trunc instructions
2294// following a load. The folding only picks up one. Extend this
2295// to check subsequent instructions for the same pattern and remove
2296// them. Thus ResultReg should be the def reg for the last redundant
2297// instruction in a chain, and all intervening instructions can be
2298// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2299// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2300bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2301 const LoadInst *LI) {
2302 // Verify we have a legal type before going any further.
2303 MVT VT;
2304 if (!isLoadTypeLegal(LI->getType(), VT))
2305 return false;
2306
2307 // Combine load followed by zero- or sign-extend.
2308 bool IsZExt = false;
2309 switch(MI->getOpcode()) {
2310 default:
2311 return false;
2312
2313 case PPC::RLDICL:
2314 case PPC::RLDICL_32_64: {
2315 IsZExt = true;
2316 unsigned MB = MI->getOperand(3).getImm();
2317 if ((VT == MVT::i8 && MB <= 56) ||
2318 (VT == MVT::i16 && MB <= 48) ||
2319 (VT == MVT::i32 && MB <= 32))
2320 break;
2321 return false;
2322 }
2323
2324 case PPC::RLWINM:
2325 case PPC::RLWINM8: {
2326 IsZExt = true;
2327 unsigned MB = MI->getOperand(3).getImm();
2328 if ((VT == MVT::i8 && MB <= 24) ||
2329 (VT == MVT::i16 && MB <= 16))
2330 break;
2331 return false;
2332 }
2333
2334 case PPC::EXTSB:
2335 case PPC::EXTSB8:
2336 case PPC::EXTSB8_32_64:
2337 /* There is no sign-extending load-byte instruction. */
2338 return false;
2339
2340 case PPC::EXTSH:
2341 case PPC::EXTSH8:
2342 case PPC::EXTSH8_32_64: {
2343 if (VT != MVT::i16 && VT != MVT::i8)
2344 return false;
2345 break;
2346 }
2347
2348 case PPC::EXTSW:
2349 case PPC::EXTSW_32:
2350 case PPC::EXTSW_32_64: {
2351 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2352 return false;
2353 break;
2354 }
2355 }
2356
2357 // See if we can handle this address.
2358 Address Addr;
2359 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2360 return false;
2361
2362 Register ResultReg = MI->getOperand(0).getReg();
2363
2364 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2365 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2366 return false;
2367
2369 removeDeadCode(I, std::next(I));
2370 return true;
2371}
2372
2373// Attempt to lower call arguments in a faster way than done by
2374// the selection DAG code.
2375bool PPCFastISel::fastLowerArguments() {
2376 // Defer to normal argument lowering for now. It's reasonably
2377 // efficient. Consider doing something like ARM to handle the
2378 // case where all args fit in registers, no varargs, no float
2379 // or vector args.
2380 return false;
2381}
2382
2383// Handle materializing integer constants into a register. This is not
2384// automatically generated for PowerPC, so must be explicitly created here.
2385Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2386
2387 if (Opc != ISD::Constant)
2388 return Register();
2389
2390 // If we're using CR bit registers for i1 values, handle that as a special
2391 // case first.
2392 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2393 Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2395 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2396 return ImmReg;
2397 }
2398
2399 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2400 VT != MVT::i1)
2401 return Register();
2402
2403 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2404 &PPC::GPRCRegClass);
2405 if (VT == MVT::i64)
2406 return PPCMaterialize64BitInt(Imm, RC);
2407 else
2408 return PPCMaterialize32BitInt(Imm, RC);
2409}
2410
2411// Override for ADDI and ADDI8 to set the correct register class
2412// on RHS operand 0. The automatic infrastructure naively assumes
2413// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2414// for these cases. At the moment, none of the other automatically
2415// generated RI instructions require special treatment. However, once
2416// SelectSelect is implemented, "isel" requires similar handling.
2417//
2418// Also be conservative about the output register class. Avoid
2419// assigning R0 or X0 to the output register for GPRC and G8RC
2420// register classes, as any such result could be used in ADDI, etc.,
2421// where those regs have another meaning.
2422Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2423 const TargetRegisterClass *RC,
2424 Register Op0, uint64_t Imm) {
2425 if (MachineInstOpcode == PPC::ADDI)
2426 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2427 else if (MachineInstOpcode == PPC::ADDI8)
2428 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2429
2430 const TargetRegisterClass *UseRC =
2431 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2432 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2433
2434 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC, Op0, Imm);
2435}
2436
2437// Override for instructions with one register operand to avoid use of
2438// R0/X0. The automatic infrastructure isn't aware of the context so
2439// we must be conservative.
2440Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2441 const TargetRegisterClass *RC,
2442 Register Op0) {
2443 const TargetRegisterClass *UseRC =
2444 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2445 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2446
2447 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0);
2448}
2449
2450// Override for instructions with two register operands to avoid use
2451// of R0/X0. The automatic infrastructure isn't aware of the context
2452// so we must be conservative.
2453Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2454 const TargetRegisterClass *RC,
2455 Register Op0, Register Op1) {
2456 const TargetRegisterClass *UseRC =
2457 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2458 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2459
2460 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op1);
2461}
2462
2463namespace llvm {
2464 // Create the fast instruction selector for PowerPC64 ELF.
2466 const TargetLibraryInfo *LibInfo,
2467 const LibcallLoweringInfo *LibcallLowering) {
2468 // Only available on 64-bit for now.
2469 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2470 if (Subtarget.isPPC64())
2471 return new PPCFastISel(FuncInfo, LibInfo, LibcallLowering);
2472 return nullptr;
2473}
2474}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the FastISel class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
static std::optional< PPC::Predicate > getComparePred(CmpInst::Predicate Pred)
static constexpr MCPhysReg FPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file describes how to lower LLVM code to machine code.
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
an instruction to allocate memory on the stack
PointerType * getType() const
Overload to return most specific pointer type.
Register getLocReg() const
LocInfo getLocInfo() const
unsigned getValNo() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
Register fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, uint64_t Imm)
Emit a MachineInstr with a register operand, an immediate, and a result register in the given registe...
Register fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0, Register Op1)
Emit a MachineInstr with two register operands and a result register in the given register class.
Register fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, Register Op0)
Emit a MachineInstr with one register operand and a result register in the given register class.
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
MachineBasicBlock::iterator InsertPt
MBB - The current insert position inside the current block.
MachineBasicBlock * MBB
MBB - The current block.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
Machine Value Type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MachineInstrBundleIterator< MachineInstr > iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
const PPCTargetLowering * getTargetLowering() const override
const PPCInstrInfo * getInstrInfo() const override
bool isLittleEndian() const
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:774
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
bool isOSAIX() const
Tests whether the OS is AIX.
Definition Triple.h:771
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
const Use * const_op_iterator
Definition User.h:255
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
TypeSize getSequentialElementStride(const DataLayout &DL) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ MO_TOC_LO
Definition PPC.h:187
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
RegState
Flags to represent properties of register accesses.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
generic_gep_type_iterator<> gep_type_iterator
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.