LLVM 23.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
167 unsigned OpIdx, bool IsByte,
168 const PPCInstrInfo *TII);
169
170// A faster local-[exec|dynamic] TLS access sequence (enabled with the
171// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
172// variables; consistent with the IBM XL compiler, we apply a max size of
173// slightly under 32KB.
175
176// FIXME: Remove this once the bug has been fixed!
178
180 const PPCSubtarget &STI)
181 : TargetLowering(TM, STI), Subtarget(STI) {
182 // Initialize map that relates the PPC addressing modes to the computed flags
183 // of a load/store instruction. The map is used to determine the optimal
184 // addressing mode when selecting load and stores.
185 initializeAddrModeMap();
186 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
187 // arguments are at least 4/8 bytes aligned.
188 bool isPPC64 = Subtarget.isPPC64();
189 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
190 const MVT RegVT = Subtarget.getScalarIntVT();
191
192 // Set up the register classes.
193 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
194 if (!useSoftFloat()) {
195 if (hasSPE()) {
196 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
197 // EFPU2 APU only supports f32
198 if (!Subtarget.hasEFPU2())
199 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
200 } else {
201 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
202 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
203 }
204 }
205
208
209 // PowerPC uses addo_carry,subo_carry to propagate carry.
212
213 // On P10, the default lowering generates better code using the
214 // setbc instruction.
215 if (!Subtarget.hasP10Vector()) {
218 if (isPPC64) {
221 }
222 }
223
224 // Match BITREVERSE to customized fast code sequence in the td file.
227
228 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
230
231 // Custom lower inline assembly to check for special registers.
234
235 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
236 for (MVT VT : MVT::integer_valuetypes()) {
239 }
240
241 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
243
244 if (Subtarget.isISA3_0()) {
245 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
246 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
247 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
248 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
249 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
250 } else {
251 // No extending loads from f16 or HW conversions back and forth.
252 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
254 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
257 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
260 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
261 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
262 }
263
264 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
265
266 // PowerPC has pre-inc load and store's.
277 if (!Subtarget.hasSPE()) {
282 }
283
284 if (Subtarget.useCRBits()) {
286
287 if (isPPC64 || Subtarget.hasFPCVT()) {
292
294 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
296 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
297
302
304 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
306 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
307 } else {
312 }
313
314 // PowerPC does not support direct load/store of condition registers.
317
318 // FIXME: Remove this once the ANDI glue bug is fixed:
319 if (ANDIGlueBug)
321
322 for (MVT VT : MVT::integer_valuetypes()) {
325 setTruncStoreAction(VT, MVT::i1, Expand);
326 }
327
328 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
329 }
330
331 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
332 // PPC (the libcall is not available).
337
338 // We do not currently implement these libm ops for PowerPC.
339 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
341 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
342 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
344 setOperationAction(ISD::FREM, MVT::ppcf128, LibCall);
345
346 // PowerPC has no SREM/UREM instructions unless we are on P9
347 // On P9 we may use a hardware instruction to compute the remainder.
348 // When the result of both the remainder and the division is required it is
349 // more efficient to compute the remainder from the result of the division
350 // rather than use the remainder instruction. The instructions are legalized
351 // directly because the DivRemPairsPass performs the transformation at the IR
352 // level.
353 if (Subtarget.isISA3_0()) {
358 } else {
363 }
364
365 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
374
375 // Handle constrained floating-point operations of scalar.
376 // TODO: Handle SPE specific operation.
382
387
388 if (!Subtarget.hasSPE()) {
391 }
392
393 if (Subtarget.hasVSX()) {
396 }
397
398 if (Subtarget.hasFSQRT()) {
401 }
402
403 if (Subtarget.hasFPRND()) {
408
413 }
414
415 // We don't support sin/cos/sqrt/fmod/pow
426
427 // MASS transformation for LLVM intrinsics with replicating fast-math flag
428 // to be consistent to PPCGenScalarMASSEntries pass
429 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
442 }
443
444 if (Subtarget.hasSPE()) {
447 } else {
448 setOperationAction(ISD::FMA , MVT::f64, Legal);
449 setOperationAction(ISD::FMA , MVT::f32, Legal);
452 }
453
454 if (Subtarget.hasSPE())
455 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
456
457 // If we're enabling GP optimizations, use hardware square root
458 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
460
461 if (!Subtarget.hasFSQRT() &&
462 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
464
465 if (Subtarget.hasFCPSGN()) {
468 } else {
471 }
472
473 if (Subtarget.hasFPRND()) {
478
483 }
484
485 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
486 // instruction xxbrd to speed up scalar BSWAP64.
487 if (Subtarget.isISA3_1()) {
490 } else {
493 ((Subtarget.hasP8Vector()) && isPPC64) ? Custom
494 : Expand);
495 }
496
497 // CTPOP or CTTZ were introduced in P8/P9 respectively
498 if (Subtarget.isISA3_0()) {
499 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
500 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
501 } else {
502 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
503 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
504 }
505
506 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
509 } else {
512 }
513
514 // PowerPC does not have ROTR
517
518 if (!Subtarget.useCRBits()) {
519 // PowerPC does not have Select
524 }
525
526 // PowerPC wants to turn select_cc of FP into fsel when possible.
529
530 // PowerPC wants to optimize integer setcc a bit
531 if (!Subtarget.useCRBits())
533
534 if (Subtarget.hasFPU()) {
538
542 }
543
544 // PowerPC does not have BRCOND which requires SetCC
545 if (!Subtarget.useCRBits())
547
549
550 if (Subtarget.hasSPE()) {
551 // SPE has built-in conversions
558
559 // SPE supports signaling compare of f32/f64.
562 } else {
563 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
566
567 // PowerPC does not have [U|S]INT_TO_FP
572 }
573
574 if (Subtarget.hasDirectMove() && isPPC64) {
579
588 } else {
593 }
594
595 // We cannot sextinreg(i1). Expand to shifts.
597
598 // Custom handling for PowerPC ucmp instruction
600 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
601
602 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
603 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
604 // support continuation, user-level threading, and etc.. As a result, no
605 // other SjLj exception interfaces are implemented and please don't build
606 // your own exception handling based on them.
607 // LLVM/Clang supports zero-cost DWARF exception handling.
610
611 // We want to legalize GlobalAddress and ConstantPool nodes into the
612 // appropriate instructions to materialize the address.
623
624 // TRAP is legal.
625 setOperationAction(ISD::TRAP, MVT::Other, Legal);
626
627 // TRAMPOLINE is custom lowered.
630
631 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
633
634 if (Subtarget.is64BitELFABI()) {
635 // VAARG always uses double-word chunks, so promote anything smaller.
637 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
639 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
641 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
643 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
645 } else if (Subtarget.is32BitELFABI()) {
646 // VAARG is custom lowered with the 32-bit SVR4 ABI.
649 } else
651
652 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
653 if (Subtarget.is32BitELFABI())
655 else
657
658 // Use the default implementation.
659 setOperationAction(ISD::VAEND , MVT::Other, Expand);
668
669 if (Subtarget.isISA3_0() && isPPC64) {
670 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
671 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
672 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
673 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
674 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
675 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
676 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
677 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
678 }
679
680 // We want to custom lower some of our intrinsics.
686
687 // To handle counter-based loop conditions.
690
695
696 // Comparisons that require checking two conditions.
697 if (Subtarget.hasSPE()) {
702 }
715
718
719 if (Subtarget.has64BitSupport()) {
720 // They also have instructions for converting between i64 and fp.
729 // This is just the low 32 bits of a (signed) fp->i64 conversion.
730 // We cannot do this with Promote because i64 is not a legal type.
733
734 if (Subtarget.hasLFIWAX() || isPPC64) {
737 }
738 } else {
739 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
740 if (Subtarget.hasSPE()) {
743 } else {
746 }
747 }
748
749 // With the instructions enabled under FPCVT, we can do everything.
750 if (Subtarget.hasFPCVT()) {
751 if (Subtarget.has64BitSupport()) {
760 }
761
770 }
771
772 if (Subtarget.use64BitRegs()) {
773 // 64-bit PowerPC implementations can support i64 types directly
774 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
775 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
777 // 64-bit PowerPC wants to expand i128 shifts itself.
781 } else {
782 // 32-bit PowerPC wants to expand i64 shifts itself.
786 }
787
788 // PowerPC has better expansions for funnel shifts than the generic
789 // TargetLowering::expandFunnelShift.
790 if (Subtarget.has64BitSupport()) {
793 }
796
797 if (Subtarget.hasVSX()) {
808 }
809
810 if (Subtarget.hasAltivec()) {
811 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
818 }
819 // First set operation action for all vector types to expand. Then we
820 // will selectively turn on ones that can be effectively codegen'd.
822 // add/sub are legal for all supported vector VT's.
825
826 // For v2i64, these are only valid with P8Vector. This is corrected after
827 // the loop.
828 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
833 }
834 else {
839 }
840
841 if (Subtarget.hasVSX()) {
847 }
848
849 // Vector instructions introduced in P8
850 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
853 }
854 else {
857 }
858
859 // Vector instructions introduced in P9
860 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
862 else
864
865 // We promote all shuffles to v16i8.
867 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
868
869 // We promote all non-typed operations to v4i32.
871 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
873 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
875 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
877 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
879 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
882 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
884 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
885
886 // No other operations are legal.
925
926 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
927 setTruncStoreAction(VT, InnerVT, Expand);
930 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
931 }
932 }
934 if (!Subtarget.hasP8Vector()) {
935 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
936 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
937 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
938 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
939 }
940
941 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
942 // with merges, splats, etc.
944
945 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
946 // are cheap, so handle them before they get expanded to scalar.
952
953 setOperationAction(ISD::AND , MVT::v4i32, Legal);
954 setOperationAction(ISD::OR , MVT::v4i32, Legal);
955 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
956 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
958 Subtarget.useCRBits() ? Legal : Expand);
959 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
969 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
972
973 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
974 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
975 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
976 if (Subtarget.hasAltivec())
977 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
979 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
980 if (Subtarget.hasP8Altivec())
981 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
982
983 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
984 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
985 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
986 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
987
988 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
989 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
990
991 if (Subtarget.hasVSX()) {
992 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
993 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
995 }
996
997 if (Subtarget.hasP8Altivec())
998 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
999 else
1000 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1001
1002 if (Subtarget.isISA3_1()) {
1003 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1004 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
1005 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
1006 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1007 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1008 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1009 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1010 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1011 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1012 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1013 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1014 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1015 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1016 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1017 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1018 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1019 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1020 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1021 }
1022
1023 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1024 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1025
1028 // LE is P8+/64-bit so direct moves are supported and these operations
1029 // are legal. The custom transformation requires 64-bit since we need a
1030 // pair of stores that will cover a 128-bit load for P10.
1031 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1035 }
1036
1041
1042 // Altivec does not contain unordered floating-point compare instructions
1043 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1044 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1045 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1046 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1047
1048 if (Subtarget.hasVSX()) {
1051 if (Subtarget.hasP8Vector()) {
1054 }
1055 if (Subtarget.hasDirectMove() && isPPC64) {
1064 }
1066
1067 // The nearbyint variants are not allowed to raise the inexact exception
1068 // so we can only code-gen them with fpexcept.ignore.
1073
1074 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1075 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1076 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1077 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1078 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1081
1082 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1083 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1086
1087 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1088 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1089
1090 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1091 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1092
1093 // Share the Altivec comparison restrictions.
1094 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1095 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1096 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1097 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1098
1099 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1100 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1101
1103
1104 if (Subtarget.hasP8Vector())
1105 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1106
1107 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1108
1109 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1110 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1111 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1112
1113 if (Subtarget.hasP8Altivec()) {
1114 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1115 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1116 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1117
1118 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1119 // SRL, but not for SRA because of the instructions available:
1120 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1121 // doing
1122 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1123 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1124 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1125
1126 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1127 }
1128 else {
1129 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1130 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1131 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1132
1133 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1134
1135 // VSX v2i64 only supports non-arithmetic operations.
1136 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1137 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1138 }
1139
1140 if (Subtarget.isISA3_1())
1141 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1142 else
1143 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1144
1145 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1146 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1148 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1149
1151
1160
1161 // Custom handling for partial vectors of integers converted to
1162 // floating point. We already have optimal handling for v2i32 through
1163 // the DAG combine, so those aren't necessary.
1180
1181 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1182 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1183 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1184 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1187
1190
1191 // Handle constrained floating-point operations of vector.
1192 // The predictor is `hasVSX` because altivec instruction has
1193 // no exception but VSX vector instruction has.
1207
1221
1222 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1223 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1224
1225 for (MVT FPT : MVT::fp_valuetypes())
1226 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1227
1228 // Expand the SELECT to SELECT_CC
1230
1231 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1232 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1233
1234 // No implementation for these ops for PowerPC.
1236 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1237 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1238 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1241 }
1242
1243 if (Subtarget.hasP8Altivec()) {
1244 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1245 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1246 }
1247
1248 if (Subtarget.hasP9Vector()) {
1251
1252 // Test data class instructions store results in CR bits.
1253 if (Subtarget.useCRBits()) {
1258 }
1259
1260 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1261 // SRL, but not for SRA because of the instructions available:
1262 // VS{RL} and VS{RL}O.
1263 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1264 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1265 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1266
1267 setOperationAction(ISD::FADD, MVT::f128, Legal);
1268 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1269 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1270 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1272
1273 setOperationAction(ISD::FMA, MVT::f128, Legal);
1280
1282 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1284 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1287
1291
1292 // Handle constrained floating-point operations of fp128
1309 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1310 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1311 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1312 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1313 } else if (Subtarget.hasVSX()) {
1316
1317 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1318 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1319
1320 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1321 // fp_to_uint and int_to_fp.
1324
1325 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1326 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1327 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1328 setOperationAction(ISD::FABS, MVT::f128, Expand);
1330 setOperationAction(ISD::FMA, MVT::f128, Expand);
1332
1333 // Expand the fp_extend if the target type is fp128.
1336
1337 // Expand the fp_round if the source type is fp128.
1338 for (MVT VT : {MVT::f32, MVT::f64}) {
1341 }
1342
1347
1348 // Lower following f128 select_cc pattern:
1349 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1351
1352 // We need to handle f128 SELECT_CC with integer result type.
1354 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1355 }
1356
1357 if (Subtarget.hasP9Altivec()) {
1358 if (Subtarget.isISA3_1()) {
1363 } else {
1366 }
1374
1375 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1376 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1377 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1378 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1379 }
1380
1381 if (Subtarget.hasP10Vector()) {
1383 }
1384 }
1385
1386 if (Subtarget.pairedVectorMemops()) {
1387 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1388 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1389 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1390 }
1391 if (Subtarget.hasMMA()) {
1392 if (Subtarget.isISAFuture()) {
1393 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1394 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1395 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1396 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1397 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1398 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1399 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1400 } else {
1401 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1402 }
1403 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1404 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1406 }
1407
1408 if (Subtarget.has64BitSupport())
1410
1411 if (Subtarget.isISA3_1())
1412 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1413
1414 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1415
1416 if (!isPPC64) {
1419 }
1420
1425 }
1426
1428
1429 if (Subtarget.hasAltivec()) {
1430 // Altivec instructions set fields to all zeros or all ones.
1432 }
1433
1436 else if (isPPC64)
1438 else
1440
1441 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1442
1443 // We have target-specific dag combine patterns for the following nodes:
1447 if (Subtarget.hasFPCVT())
1450 if (Subtarget.useCRBits())
1454
1456
1458
1459 if (Subtarget.useCRBits()) {
1461 }
1462
1463 if (Subtarget.hasP8Vector())
1465
1466 // With 32 condition bits, we don't need to sink (and duplicate) compares
1467 // aggressively in CodeGenPrep.
1468 if (Subtarget.useCRBits()) {
1470 }
1471
1472 // TODO: The default entry number is set to 64. This stops most jump table
1473 // generation on PPC. But it is good for current PPC HWs because the indirect
1474 // branch instruction mtctr to the jump table may lead to bad branch predict.
1475 // Re-evaluate this value on future HWs that can do better with mtctr.
1477
1478 // The default minimum of largest number in a BitTest cluster is 3.
1480
1482 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1483
1484 auto CPUDirective = Subtarget.getCPUDirective();
1485 switch (CPUDirective) {
1486 default: break;
1487 case PPC::DIR_970:
1488 case PPC::DIR_A2:
1489 case PPC::DIR_E500:
1490 case PPC::DIR_E500mc:
1491 case PPC::DIR_E5500:
1492 case PPC::DIR_PWR4:
1493 case PPC::DIR_PWR5:
1494 case PPC::DIR_PWR5X:
1495 case PPC::DIR_PWR6:
1496 case PPC::DIR_PWR6X:
1497 case PPC::DIR_PWR7:
1498 case PPC::DIR_PWR8:
1499 case PPC::DIR_PWR9:
1500 case PPC::DIR_PWR10:
1501 case PPC::DIR_PWR11:
1505 break;
1506 }
1507
1508 if (Subtarget.enableMachineScheduler())
1510 else
1512
1514
1515 // The Freescale cores do better with aggressive inlining of memcpy and
1516 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1517 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1518 MaxStoresPerMemset = 32;
1520 MaxStoresPerMemcpy = 32;
1524 } else if (CPUDirective == PPC::DIR_A2) {
1525 // The A2 also benefits from (very) aggressive inlining of memcpy and
1526 // friends. The overhead of a the function call, even when warm, can be
1527 // over one hundred cycles.
1528 MaxStoresPerMemset = 128;
1529 MaxStoresPerMemcpy = 128;
1530 MaxStoresPerMemmove = 128;
1531 MaxLoadsPerMemcmp = 128;
1532 } else {
1535 }
1536
1537 // Enable generation of STXVP instructions by default for mcpu=future.
1538 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1539 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1540 DisableAutoPairedVecSt = false;
1541
1542 IsStrictFPEnabled = true;
1543
1544 // Let the subtarget (CPU) decide if a predictable select is more expensive
1545 // than the corresponding branch. This information is used in CGP to decide
1546 // when to convert selects into branches.
1547 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1548
1550}
1551
1552// *********************************** NOTE ************************************
1553// For selecting load and store instructions, the addressing modes are defined
1554// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1555// patterns to match the load the store instructions.
1556//
1557// The TD definitions for the addressing modes correspond to their respective
1558// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1559// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1560// address mode flags of a particular node. Afterwards, the computed address
1561// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1562// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1563// accordingly, based on the preferred addressing mode.
1564//
1565// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1566// MemOpFlags contains all the possible flags that can be used to compute the
1567// optimal addressing mode for load and store instructions.
1568// AddrMode contains all the possible load and store addressing modes available
1569// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1570//
1571// When adding new load and store instructions, it is possible that new address
1572// flags may need to be added into MemOpFlags, and a new addressing mode will
1573// need to be added to AddrMode. An entry of the new addressing mode (consisting
1574// of the minimal and main distinguishing address flags for the new load/store
1575// instructions) will need to be added into initializeAddrModeMap() below.
1576// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1577// need to be updated to account for selecting the optimal addressing mode.
1578// *****************************************************************************
1579/// Initialize the map that relates the different addressing modes of the load
1580/// and store instructions to a set of flags. This ensures the load/store
1581/// instruction is correctly matched during instruction selection.
1582void PPCTargetLowering::initializeAddrModeMap() {
1583 AddrModesMap[PPC::AM_DForm] = {
1584 // LWZ, STW
1589 // LBZ, LHZ, STB, STH
1594 // LHA
1599 // LFS, LFD, STFS, STFD
1604 };
1605 AddrModesMap[PPC::AM_DSForm] = {
1606 // LWA
1610 // LD, STD
1614 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1618 };
1619 AddrModesMap[PPC::AM_DQForm] = {
1620 // LXV, STXV
1624 };
1625 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1627 // TODO: Add mapping for quadword load/store.
1628}
1629
1630/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1631/// the desired ByVal argument alignment.
1632static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1633 if (MaxAlign == MaxMaxAlign)
1634 return;
1635 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1636 if (MaxMaxAlign >= 32 &&
1637 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1638 MaxAlign = Align(32);
1639 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1640 MaxAlign < 16)
1641 MaxAlign = Align(16);
1642 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1643 Align EltAlign;
1644 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1645 if (EltAlign > MaxAlign)
1646 MaxAlign = EltAlign;
1647 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1648 for (auto *EltTy : STy->elements()) {
1649 Align EltAlign;
1650 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1651 if (EltAlign > MaxAlign)
1652 MaxAlign = EltAlign;
1653 if (MaxAlign == MaxMaxAlign)
1654 break;
1655 }
1656 }
1657}
1658
1659/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1660/// function arguments in the caller parameter area.
1662 const DataLayout &DL) const {
1663 // 16byte and wider vectors are passed on 16byte boundary.
1664 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1665 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1666 if (Subtarget.hasAltivec())
1667 getMaxByValAlign(Ty, Alignment, Align(16));
1668 return Alignment;
1669}
1670
1672 return Subtarget.useSoftFloat();
1673}
1674
1676 return Subtarget.hasSPE();
1677}
1678
1680 return VT.isScalarInteger();
1681}
1682
1684 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1685 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1686 return false;
1687
1688 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1689 if (VTy->getScalarType()->isIntegerTy()) {
1690 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1691 if (ElemSizeInBits == 32) {
1692 Index = Subtarget.isLittleEndian() ? 2 : 1;
1693 return true;
1694 }
1695 if (ElemSizeInBits == 64) {
1696 Index = Subtarget.isLittleEndian() ? 1 : 0;
1697 return true;
1698 }
1699 }
1700 }
1701 return false;
1702}
1703
1705 EVT VT) const {
1706 if (!VT.isVector())
1707 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1708
1710}
1711
1713 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1714 return true;
1715}
1716
1717//===----------------------------------------------------------------------===//
1718// Node matching predicates, for use by the tblgen matching code.
1719//===----------------------------------------------------------------------===//
1720
1721/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1724 return CFP->getValueAPF().isZero();
1725 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1726 // Maybe this has already been legalized into the constant pool?
1727 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1728 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1729 return CFP->getValueAPF().isZero();
1730 }
1731 return false;
1732}
1733
1734/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1735/// true if Op is undef or if it matches the specified value.
1736static bool isConstantOrUndef(int Op, int Val) {
1737 return Op < 0 || Op == Val;
1738}
1739
1740/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1741/// VPKUHUM instruction.
1742/// The ShuffleKind distinguishes between big-endian operations with
1743/// two different inputs (0), either-endian operations with two identical
1744/// inputs (1), and little-endian operations with two different inputs (2).
1745/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1747 SelectionDAG &DAG) {
1748 bool IsLE = DAG.getDataLayout().isLittleEndian();
1749 if (ShuffleKind == 0) {
1750 if (IsLE)
1751 return false;
1752 for (unsigned i = 0; i != 16; ++i)
1753 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1754 return false;
1755 } else if (ShuffleKind == 2) {
1756 if (!IsLE)
1757 return false;
1758 for (unsigned i = 0; i != 16; ++i)
1759 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1760 return false;
1761 } else if (ShuffleKind == 1) {
1762 unsigned j = IsLE ? 0 : 1;
1763 for (unsigned i = 0; i != 8; ++i)
1764 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1765 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1766 return false;
1767 }
1768 return true;
1769}
1770
1771/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1772/// VPKUWUM instruction.
1773/// The ShuffleKind distinguishes between big-endian operations with
1774/// two different inputs (0), either-endian operations with two identical
1775/// inputs (1), and little-endian operations with two different inputs (2).
1776/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1778 SelectionDAG &DAG) {
1779 bool IsLE = DAG.getDataLayout().isLittleEndian();
1780 if (ShuffleKind == 0) {
1781 if (IsLE)
1782 return false;
1783 for (unsigned i = 0; i != 16; i += 2)
1784 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1785 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1786 return false;
1787 } else if (ShuffleKind == 2) {
1788 if (!IsLE)
1789 return false;
1790 for (unsigned i = 0; i != 16; i += 2)
1791 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1792 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1793 return false;
1794 } else if (ShuffleKind == 1) {
1795 unsigned j = IsLE ? 0 : 2;
1796 for (unsigned i = 0; i != 8; i += 2)
1797 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1798 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1799 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1800 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1801 return false;
1802 }
1803 return true;
1804}
1805
1806/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1807/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1808/// current subtarget.
1809///
1810/// The ShuffleKind distinguishes between big-endian operations with
1811/// two different inputs (0), either-endian operations with two identical
1812/// inputs (1), and little-endian operations with two different inputs (2).
1813/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1815 SelectionDAG &DAG) {
1816 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1817 if (!Subtarget.hasP8Vector())
1818 return false;
1819
1820 bool IsLE = DAG.getDataLayout().isLittleEndian();
1821 if (ShuffleKind == 0) {
1822 if (IsLE)
1823 return false;
1824 for (unsigned i = 0; i != 16; i += 4)
1825 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1826 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1827 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1828 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1829 return false;
1830 } else if (ShuffleKind == 2) {
1831 if (!IsLE)
1832 return false;
1833 for (unsigned i = 0; i != 16; i += 4)
1834 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1835 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1836 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1837 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1838 return false;
1839 } else if (ShuffleKind == 1) {
1840 unsigned j = IsLE ? 0 : 4;
1841 for (unsigned i = 0; i != 8; i += 4)
1842 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1843 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1844 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1845 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1846 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1847 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1848 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1849 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1850 return false;
1851 }
1852 return true;
1853}
1854
1855/// isVMerge - Common function, used to match vmrg* shuffles.
1856///
1857static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1858 unsigned LHSStart, unsigned RHSStart) {
1859 if (N->getValueType(0) != MVT::v16i8)
1860 return false;
1861 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1862 "Unsupported merge size!");
1863
1864 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1865 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1866 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1867 LHSStart+j+i*UnitSize) ||
1868 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1869 RHSStart+j+i*UnitSize))
1870 return false;
1871 }
1872 return true;
1873}
1874
1875/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1876/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1877/// The ShuffleKind distinguishes between big-endian merges with two
1878/// different inputs (0), either-endian merges with two identical inputs (1),
1879/// and little-endian merges with two different inputs (2). For the latter,
1880/// the input operands are swapped (see PPCInstrAltivec.td).
1882 unsigned ShuffleKind, SelectionDAG &DAG) {
1883 if (DAG.getDataLayout().isLittleEndian()) {
1884 if (ShuffleKind == 1) // unary
1885 return isVMerge(N, UnitSize, 0, 0);
1886 else if (ShuffleKind == 2) // swapped
1887 return isVMerge(N, UnitSize, 0, 16);
1888 else
1889 return false;
1890 } else {
1891 if (ShuffleKind == 1) // unary
1892 return isVMerge(N, UnitSize, 8, 8);
1893 else if (ShuffleKind == 0) // normal
1894 return isVMerge(N, UnitSize, 8, 24);
1895 else
1896 return false;
1897 }
1898}
1899
1900/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1901/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1902/// The ShuffleKind distinguishes between big-endian merges with two
1903/// different inputs (0), either-endian merges with two identical inputs (1),
1904/// and little-endian merges with two different inputs (2). For the latter,
1905/// the input operands are swapped (see PPCInstrAltivec.td).
1907 unsigned ShuffleKind, SelectionDAG &DAG) {
1908 if (DAG.getDataLayout().isLittleEndian()) {
1909 if (ShuffleKind == 1) // unary
1910 return isVMerge(N, UnitSize, 8, 8);
1911 else if (ShuffleKind == 2) // swapped
1912 return isVMerge(N, UnitSize, 8, 24);
1913 else
1914 return false;
1915 } else {
1916 if (ShuffleKind == 1) // unary
1917 return isVMerge(N, UnitSize, 0, 0);
1918 else if (ShuffleKind == 0) // normal
1919 return isVMerge(N, UnitSize, 0, 16);
1920 else
1921 return false;
1922 }
1923}
1924
1925/**
1926 * Common function used to match vmrgew and vmrgow shuffles
1927 *
1928 * The indexOffset determines whether to look for even or odd words in
1929 * the shuffle mask. This is based on the of the endianness of the target
1930 * machine.
1931 * - Little Endian:
1932 * - Use offset of 0 to check for odd elements
1933 * - Use offset of 4 to check for even elements
1934 * - Big Endian:
1935 * - Use offset of 0 to check for even elements
1936 * - Use offset of 4 to check for odd elements
1937 * A detailed description of the vector element ordering for little endian and
1938 * big endian can be found at
1939 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1940 * Targeting your applications - what little endian and big endian IBM XL C/C++
1941 * compiler differences mean to you
1942 *
1943 * The mask to the shuffle vector instruction specifies the indices of the
1944 * elements from the two input vectors to place in the result. The elements are
1945 * numbered in array-access order, starting with the first vector. These vectors
1946 * are always of type v16i8, thus each vector will contain 16 elements of size
1947 * 8. More info on the shuffle vector can be found in the
1948 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1949 * Language Reference.
1950 *
1951 * The RHSStartValue indicates whether the same input vectors are used (unary)
1952 * or two different input vectors are used, based on the following:
1953 * - If the instruction uses the same vector for both inputs, the range of the
1954 * indices will be 0 to 15. In this case, the RHSStart value passed should
1955 * be 0.
1956 * - If the instruction has two different vectors then the range of the
1957 * indices will be 0 to 31. In this case, the RHSStart value passed should
1958 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1959 * to 31 specify elements in the second vector).
1960 *
1961 * \param[in] N The shuffle vector SD Node to analyze
1962 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1963 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1964 * vector to the shuffle_vector instruction
1965 * \return true iff this shuffle vector represents an even or odd word merge
1966 */
1967static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1968 unsigned RHSStartValue) {
1969 if (N->getValueType(0) != MVT::v16i8)
1970 return false;
1971
1972 for (unsigned i = 0; i < 2; ++i)
1973 for (unsigned j = 0; j < 4; ++j)
1974 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1975 i*RHSStartValue+j+IndexOffset) ||
1976 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1977 i*RHSStartValue+j+IndexOffset+8))
1978 return false;
1979 return true;
1980}
1981
1982/**
1983 * Determine if the specified shuffle mask is suitable for the vmrgew or
1984 * vmrgow instructions.
1985 *
1986 * \param[in] N The shuffle vector SD Node to analyze
1987 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1988 * \param[in] ShuffleKind Identify the type of merge:
1989 * - 0 = big-endian merge with two different inputs;
1990 * - 1 = either-endian merge with two identical inputs;
1991 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1992 * little-endian merges).
1993 * \param[in] DAG The current SelectionDAG
1994 * \return true iff this shuffle mask
1995 */
1997 unsigned ShuffleKind, SelectionDAG &DAG) {
1998 if (DAG.getDataLayout().isLittleEndian()) {
1999 unsigned indexOffset = CheckEven ? 4 : 0;
2000 if (ShuffleKind == 1) // Unary
2001 return isVMerge(N, indexOffset, 0);
2002 else if (ShuffleKind == 2) // swapped
2003 return isVMerge(N, indexOffset, 16);
2004 else
2005 return false;
2006 }
2007 else {
2008 unsigned indexOffset = CheckEven ? 0 : 4;
2009 if (ShuffleKind == 1) // Unary
2010 return isVMerge(N, indexOffset, 0);
2011 else if (ShuffleKind == 0) // Normal
2012 return isVMerge(N, indexOffset, 16);
2013 else
2014 return false;
2015 }
2016 return false;
2017}
2018
2019/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2020/// amount, otherwise return -1.
2021/// The ShuffleKind distinguishes between big-endian operations with two
2022/// different inputs (0), either-endian operations with two identical inputs
2023/// (1), and little-endian operations with two different inputs (2). For the
2024/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2025int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2026 SelectionDAG &DAG) {
2027 if (N->getValueType(0) != MVT::v16i8)
2028 return -1;
2029
2031
2032 // Find the first non-undef value in the shuffle mask.
2033 unsigned i;
2034 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2035 /*search*/;
2036
2037 if (i == 16) return -1; // all undef.
2038
2039 // Otherwise, check to see if the rest of the elements are consecutively
2040 // numbered from this value.
2041 unsigned ShiftAmt = SVOp->getMaskElt(i);
2042 if (ShiftAmt < i) return -1;
2043
2044 ShiftAmt -= i;
2045 bool isLE = DAG.getDataLayout().isLittleEndian();
2046
2047 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2048 // Check the rest of the elements to see if they are consecutive.
2049 for (++i; i != 16; ++i)
2050 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2051 return -1;
2052 } else if (ShuffleKind == 1) {
2053 // Check the rest of the elements to see if they are consecutive.
2054 for (++i; i != 16; ++i)
2055 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2056 return -1;
2057 } else
2058 return -1;
2059
2060 if (isLE)
2061 ShiftAmt = 16 - ShiftAmt;
2062
2063 return ShiftAmt;
2064}
2065
2066/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2067/// specifies a splat of a single element that is suitable for input to
2068/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2070 EVT VT = N->getValueType(0);
2071 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2072 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2073
2074 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2075 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2076
2077 // The consecutive indices need to specify an element, not part of two
2078 // different elements. So abandon ship early if this isn't the case.
2079 if (N->getMaskElt(0) % EltSize != 0)
2080 return false;
2081
2082 // This is a splat operation if each element of the permute is the same, and
2083 // if the value doesn't reference the second vector.
2084 unsigned ElementBase = N->getMaskElt(0);
2085
2086 // FIXME: Handle UNDEF elements too!
2087 if (ElementBase >= 16)
2088 return false;
2089
2090 // Check that the indices are consecutive, in the case of a multi-byte element
2091 // splatted with a v16i8 mask.
2092 for (unsigned i = 1; i != EltSize; ++i)
2093 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2094 return false;
2095
2096 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2097 // An UNDEF element is a sequence of UNDEF bytes.
2098 if (N->getMaskElt(i) < 0) {
2099 for (unsigned j = 1; j != EltSize; ++j)
2100 if (N->getMaskElt(i + j) >= 0)
2101 return false;
2102 } else
2103 for (unsigned j = 0; j != EltSize; ++j)
2104 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2105 return false;
2106 }
2107 return true;
2108}
2109
2110/// Check that the mask is shuffling N byte elements. Within each N byte
2111/// element of the mask, the indices could be either in increasing or
2112/// decreasing order as long as they are consecutive.
2113/// \param[in] N the shuffle vector SD Node to analyze
2114/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2115/// Word/DoubleWord/QuadWord).
2116/// \param[in] StepLen the delta indices number among the N byte element, if
2117/// the mask is in increasing/decreasing order then it is 1/-1.
2118/// \return true iff the mask is shuffling N byte elements.
2119static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2120 int StepLen) {
2121 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2122 "Unexpected element width.");
2123 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2124
2125 unsigned NumOfElem = 16 / Width;
2126 unsigned MaskVal[16]; // Width is never greater than 16
2127 for (unsigned i = 0; i < NumOfElem; ++i) {
2128 MaskVal[0] = N->getMaskElt(i * Width);
2129 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2130 return false;
2131 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2132 return false;
2133 }
2134
2135 for (unsigned int j = 1; j < Width; ++j) {
2136 MaskVal[j] = N->getMaskElt(i * Width + j);
2137 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2138 return false;
2139 }
2140 }
2141 }
2142
2143 return true;
2144}
2145
2146bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2147 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2148 if (!isNByteElemShuffleMask(N, 4, 1))
2149 return false;
2150
2151 // Now we look at mask elements 0,4,8,12
2152 unsigned M0 = N->getMaskElt(0) / 4;
2153 unsigned M1 = N->getMaskElt(4) / 4;
2154 unsigned M2 = N->getMaskElt(8) / 4;
2155 unsigned M3 = N->getMaskElt(12) / 4;
2156 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2157 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2158
2159 // Below, let H and L be arbitrary elements of the shuffle mask
2160 // where H is in the range [4,7] and L is in the range [0,3].
2161 // H, 1, 2, 3 or L, 5, 6, 7
2162 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2163 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2164 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2165 InsertAtByte = IsLE ? 12 : 0;
2166 Swap = M0 < 4;
2167 return true;
2168 }
2169 // 0, H, 2, 3 or 4, L, 6, 7
2170 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2171 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2172 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2173 InsertAtByte = IsLE ? 8 : 4;
2174 Swap = M1 < 4;
2175 return true;
2176 }
2177 // 0, 1, H, 3 or 4, 5, L, 7
2178 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2179 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2180 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2181 InsertAtByte = IsLE ? 4 : 8;
2182 Swap = M2 < 4;
2183 return true;
2184 }
2185 // 0, 1, 2, H or 4, 5, 6, L
2186 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2187 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2188 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2189 InsertAtByte = IsLE ? 0 : 12;
2190 Swap = M3 < 4;
2191 return true;
2192 }
2193
2194 // If both vector operands for the shuffle are the same vector, the mask will
2195 // contain only elements from the first one and the second one will be undef.
2196 if (N->getOperand(1).isUndef()) {
2197 ShiftElts = 0;
2198 Swap = true;
2199 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2200 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2201 InsertAtByte = IsLE ? 12 : 0;
2202 return true;
2203 }
2204 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2205 InsertAtByte = IsLE ? 8 : 4;
2206 return true;
2207 }
2208 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2209 InsertAtByte = IsLE ? 4 : 8;
2210 return true;
2211 }
2212 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2213 InsertAtByte = IsLE ? 0 : 12;
2214 return true;
2215 }
2216 }
2217
2218 return false;
2219}
2220
2222 bool &Swap, bool IsLE) {
2223 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2224 // Ensure each byte index of the word is consecutive.
2225 if (!isNByteElemShuffleMask(N, 4, 1))
2226 return false;
2227
2228 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2229 unsigned M0 = N->getMaskElt(0) / 4;
2230 unsigned M1 = N->getMaskElt(4) / 4;
2231 unsigned M2 = N->getMaskElt(8) / 4;
2232 unsigned M3 = N->getMaskElt(12) / 4;
2233
2234 // If both vector operands for the shuffle are the same vector, the mask will
2235 // contain only elements from the first one and the second one will be undef.
2236 if (N->getOperand(1).isUndef()) {
2237 assert(M0 < 4 && "Indexing into an undef vector?");
2238 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2239 return false;
2240
2241 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2242 Swap = false;
2243 return true;
2244 }
2245
2246 // Ensure each word index of the ShuffleVector Mask is consecutive.
2247 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2248 return false;
2249
2250 if (IsLE) {
2251 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2252 // Input vectors don't need to be swapped if the leading element
2253 // of the result is one of the 3 left elements of the second vector
2254 // (or if there is no shift to be done at all).
2255 Swap = false;
2256 ShiftElts = (8 - M0) % 8;
2257 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2258 // Input vectors need to be swapped if the leading element
2259 // of the result is one of the 3 left elements of the first vector
2260 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2261 Swap = true;
2262 ShiftElts = (4 - M0) % 4;
2263 }
2264
2265 return true;
2266 } else { // BE
2267 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2268 // Input vectors don't need to be swapped if the leading element
2269 // of the result is one of the 4 elements of the first vector.
2270 Swap = false;
2271 ShiftElts = M0;
2272 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2273 // Input vectors need to be swapped if the leading element
2274 // of the result is one of the 4 elements of the right vector.
2275 Swap = true;
2276 ShiftElts = M0 - 4;
2277 }
2278
2279 return true;
2280 }
2281}
2282
2284 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2285
2286 if (!isNByteElemShuffleMask(N, Width, -1))
2287 return false;
2288
2289 for (int i = 0; i < 16; i += Width)
2290 if (N->getMaskElt(i) != i + Width - 1)
2291 return false;
2292
2293 return true;
2294}
2295
2299
2303
2307
2311
2312/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2313/// if the inputs to the instruction should be swapped and set \p DM to the
2314/// value for the immediate.
2315/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2316/// AND element 0 of the result comes from the first input (LE) or second input
2317/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2318/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2319/// mask.
2321 bool &Swap, bool IsLE) {
2322 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2323
2324 // Ensure each byte index of the double word is consecutive.
2325 if (!isNByteElemShuffleMask(N, 8, 1))
2326 return false;
2327
2328 unsigned M0 = N->getMaskElt(0) / 8;
2329 unsigned M1 = N->getMaskElt(8) / 8;
2330 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2331
2332 // If both vector operands for the shuffle are the same vector, the mask will
2333 // contain only elements from the first one and the second one will be undef.
2334 if (N->getOperand(1).isUndef()) {
2335 if ((M0 | M1) < 2) {
2336 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2337 Swap = false;
2338 return true;
2339 } else
2340 return false;
2341 }
2342
2343 if (IsLE) {
2344 if (M0 > 1 && M1 < 2) {
2345 Swap = false;
2346 } else if (M0 < 2 && M1 > 1) {
2347 M0 = (M0 + 2) % 4;
2348 M1 = (M1 + 2) % 4;
2349 Swap = true;
2350 } else
2351 return false;
2352
2353 // Note: if control flow comes here that means Swap is already set above
2354 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2355 return true;
2356 } else { // BE
2357 if (M0 < 2 && M1 > 1) {
2358 Swap = false;
2359 } else if (M0 > 1 && M1 < 2) {
2360 M0 = (M0 + 2) % 4;
2361 M1 = (M1 + 2) % 4;
2362 Swap = true;
2363 } else
2364 return false;
2365
2366 // Note: if control flow comes here that means Swap is already set above
2367 DM = (M0 << 1) + (M1 & 1);
2368 return true;
2369 }
2370}
2371
2372
2373/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2374/// appropriate for PPC mnemonics (which have a big endian bias - namely
2375/// elements are counted from the left of the vector register).
2376unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2377 SelectionDAG &DAG) {
2379 assert(isSplatShuffleMask(SVOp, EltSize));
2380 EVT VT = SVOp->getValueType(0);
2381
2382 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2383 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2384 : SVOp->getMaskElt(0);
2385
2386 if (DAG.getDataLayout().isLittleEndian())
2387 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2388 else
2389 return SVOp->getMaskElt(0) / EltSize;
2390}
2391
2392/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2393/// by using a vspltis[bhw] instruction of the specified element size, return
2394/// the constant being splatted. The ByteSize field indicates the number of
2395/// bytes of each element [124] -> [bhw].
2397 SDValue OpVal;
2398
2399 // If ByteSize of the splat is bigger than the element size of the
2400 // build_vector, then we have a case where we are checking for a splat where
2401 // multiple elements of the buildvector are folded together into a single
2402 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2403 unsigned EltSize = 16/N->getNumOperands();
2404 if (EltSize < ByteSize) {
2405 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2406 SDValue UniquedVals[4];
2407 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2408
2409 // See if all of the elements in the buildvector agree across.
2410 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2411 if (N->getOperand(i).isUndef()) continue;
2412 // If the element isn't a constant, bail fully out.
2413 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2414
2415 if (!UniquedVals[i&(Multiple-1)].getNode())
2416 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2417 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2418 return SDValue(); // no match.
2419 }
2420
2421 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2422 // either constant or undef values that are identical for each chunk. See
2423 // if these chunks can form into a larger vspltis*.
2424
2425 // Check to see if all of the leading entries are either 0 or -1. If
2426 // neither, then this won't fit into the immediate field.
2427 bool LeadingZero = true;
2428 bool LeadingOnes = true;
2429 for (unsigned i = 0; i != Multiple-1; ++i) {
2430 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2431
2432 LeadingZero &= isNullConstant(UniquedVals[i]);
2433 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2434 }
2435 // Finally, check the least significant entry.
2436 if (LeadingZero) {
2437 if (!UniquedVals[Multiple-1].getNode())
2438 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2439 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2440 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2441 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2442 }
2443 if (LeadingOnes) {
2444 if (!UniquedVals[Multiple-1].getNode())
2445 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2446 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2447 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2448 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2449 }
2450
2451 return SDValue();
2452 }
2453
2454 // Check to see if this buildvec has a single non-undef value in its elements.
2455 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2456 if (N->getOperand(i).isUndef()) continue;
2457 if (!OpVal.getNode())
2458 OpVal = N->getOperand(i);
2459 else if (OpVal != N->getOperand(i))
2460 return SDValue();
2461 }
2462
2463 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2464
2465 unsigned ValSizeInBytes = EltSize;
2466 uint64_t Value = 0;
2467 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2468 Value = CN->getZExtValue();
2469 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2470 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2471 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2472 }
2473
2474 // If the splat value is larger than the element value, then we can never do
2475 // this splat. The only case that we could fit the replicated bits into our
2476 // immediate field for would be zero, and we prefer to use vxor for it.
2477 if (ValSizeInBytes < ByteSize) return SDValue();
2478
2479 // If the element value is larger than the splat value, check if it consists
2480 // of a repeated bit pattern of size ByteSize.
2481 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2482 return SDValue();
2483
2484 // Properly sign extend the value.
2485 int MaskVal = SignExtend32(Value, ByteSize * 8);
2486
2487 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2488 if (MaskVal == 0) return SDValue();
2489
2490 // Finally, if this value fits in a 5 bit sext field, return it
2491 if (SignExtend32<5>(MaskVal) == MaskVal)
2492 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2493 return SDValue();
2494}
2495
2496//===----------------------------------------------------------------------===//
2497// Addressing Mode Selection
2498//===----------------------------------------------------------------------===//
2499
2500/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2501/// or 64-bit immediate, and if the value can be accurately represented as a
2502/// sign extension from a 16-bit value. If so, this returns true and the
2503/// immediate.
2504bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2505 if (!isa<ConstantSDNode>(N))
2506 return false;
2507
2508 Imm = (int16_t)N->getAsZExtVal();
2509 if (N->getValueType(0) == MVT::i32)
2510 return Imm == (int32_t)N->getAsZExtVal();
2511 else
2512 return Imm == (int64_t)N->getAsZExtVal();
2513}
2515 return isIntS16Immediate(Op.getNode(), Imm);
2516}
2517
2518/// Used when computing address flags for selecting loads and stores.
2519/// If we have an OR, check if the LHS and RHS are provably disjoint.
2520/// An OR of two provably disjoint values is equivalent to an ADD.
2521/// Most PPC load/store instructions compute the effective address as a sum,
2522/// so doing this conversion is useful.
2523static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2524 if (N.getOpcode() != ISD::OR)
2525 return false;
2526 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2527 if (!LHSKnown.Zero.getBoolValue())
2528 return false;
2529 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2530 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2531}
2532
2533/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2534/// be represented as an indexed [r+r] operation.
2536 SDValue &Index,
2537 SelectionDAG &DAG) const {
2538 for (SDNode *U : N->users()) {
2539 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2540 if (Memop->getMemoryVT() == MVT::f64) {
2541 Base = N.getOperand(0);
2542 Index = N.getOperand(1);
2543 return true;
2544 }
2545 }
2546 }
2547 return false;
2548}
2549
2550/// isIntS34Immediate - This method tests if value of node given can be
2551/// accurately represented as a sign extension from a 34-bit value. If so,
2552/// this returns true and the immediate.
2553bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2554 if (!isa<ConstantSDNode>(N))
2555 return false;
2556
2557 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2558 return isInt<34>(Imm);
2559}
2561 return isIntS34Immediate(Op.getNode(), Imm);
2562}
2563
2564/// SelectAddressRegReg - Given the specified addressed, check to see if it
2565/// can be represented as an indexed [r+r] operation. Returns false if it
2566/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2567/// non-zero and N can be represented by a base register plus a signed 16-bit
2568/// displacement, make a more precise judgement by checking (displacement % \p
2569/// EncodingAlignment).
2571 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2572 MaybeAlign EncodingAlignment) const {
2573 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2574 // a [pc+imm].
2576 return false;
2577
2578 int16_t Imm = 0;
2579 if (N.getOpcode() == ISD::ADD) {
2580 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2581 // SPE load/store can only handle 8-bit offsets.
2582 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2583 return true;
2584 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2585 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2586 return false; // r+i
2587 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2588 return false; // r+i
2589
2590 Base = N.getOperand(0);
2591 Index = N.getOperand(1);
2592 return true;
2593 } else if (N.getOpcode() == ISD::OR) {
2594 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2595 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2596 return false; // r+i can fold it if we can.
2597
2598 // If this is an or of disjoint bitfields, we can codegen this as an add
2599 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2600 // disjoint.
2601 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2602
2603 if (LHSKnown.Zero.getBoolValue()) {
2604 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2605 // If all of the bits are known zero on the LHS or RHS, the add won't
2606 // carry.
2607 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2608 Base = N.getOperand(0);
2609 Index = N.getOperand(1);
2610 return true;
2611 }
2612 }
2613 }
2614
2615 return false;
2616}
2617
2618// If we happen to be doing an i64 load or store into a stack slot that has
2619// less than a 4-byte alignment, then the frame-index elimination may need to
2620// use an indexed load or store instruction (because the offset may not be a
2621// multiple of 4). The extra register needed to hold the offset comes from the
2622// register scavenger, and it is possible that the scavenger will need to use
2623// an emergency spill slot. As a result, we need to make sure that a spill slot
2624// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2625// stack slot.
2626static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2627 // FIXME: This does not handle the LWA case.
2628 if (VT != MVT::i64)
2629 return;
2630
2631 // NOTE: We'll exclude negative FIs here, which come from argument
2632 // lowering, because there are no known test cases triggering this problem
2633 // using packed structures (or similar). We can remove this exclusion if
2634 // we find such a test case. The reason why this is so test-case driven is
2635 // because this entire 'fixup' is only to prevent crashes (from the
2636 // register scavenger) on not-really-valid inputs. For example, if we have:
2637 // %a = alloca i1
2638 // %b = bitcast i1* %a to i64*
2639 // store i64* a, i64 b
2640 // then the store should really be marked as 'align 1', but is not. If it
2641 // were marked as 'align 1' then the indexed form would have been
2642 // instruction-selected initially, and the problem this 'fixup' is preventing
2643 // won't happen regardless.
2644 if (FrameIdx < 0)
2645 return;
2646
2648 MachineFrameInfo &MFI = MF.getFrameInfo();
2649
2650 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2651 return;
2652
2653 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2654 FuncInfo->setHasNonRISpills();
2655}
2656
2657/// Returns true if the address N can be represented by a base register plus
2658/// a signed 16-bit displacement [r+imm], and if it is not better
2659/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2660/// displacements that are multiples of that value.
2662 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2663 MaybeAlign EncodingAlignment) const {
2664 // FIXME dl should come from parent load or store, not from address
2665 SDLoc dl(N);
2666
2667 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2668 // a [pc+imm].
2670 return false;
2671
2672 // If this can be more profitably realized as r+r, fail.
2673 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2674 return false;
2675
2676 if (N.getOpcode() == ISD::ADD) {
2677 int16_t imm = 0;
2678 if (isIntS16Immediate(N.getOperand(1), imm) &&
2679 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2680 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2681 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2682 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2683 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2684 } else {
2685 Base = N.getOperand(0);
2686 }
2687 return true; // [r+i]
2688 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2689 // Match LOAD (ADD (X, Lo(G))).
2690 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2691 "Cannot handle constant offsets yet!");
2692 Disp = N.getOperand(1).getOperand(0); // The global address.
2697 Base = N.getOperand(0);
2698 return true; // [&g+r]
2699 }
2700 } else if (N.getOpcode() == ISD::OR) {
2701 int16_t imm = 0;
2702 if (isIntS16Immediate(N.getOperand(1), imm) &&
2703 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2704 // If this is an or of disjoint bitfields, we can codegen this as an add
2705 // (for better address arithmetic) if the LHS and RHS of the OR are
2706 // provably disjoint.
2707 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2708
2709 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2710 // If all of the bits are known zero on the LHS or RHS, the add won't
2711 // carry.
2712 if (FrameIndexSDNode *FI =
2713 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2714 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2715 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2716 } else {
2717 Base = N.getOperand(0);
2718 }
2719 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2720 return true;
2721 }
2722 }
2723 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2724 // Loading from a constant address.
2725
2726 // If this address fits entirely in a 16-bit sext immediate field, codegen
2727 // this as "d, 0"
2728 int16_t Imm;
2729 if (isIntS16Immediate(CN, Imm) &&
2730 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2731 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2732 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2733 CN->getValueType(0));
2734 return true;
2735 }
2736
2737 // Handle 32-bit sext immediates with LIS + addr mode.
2738 if ((CN->getValueType(0) == MVT::i32 ||
2739 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2740 (!EncodingAlignment ||
2741 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2742 int Addr = (int)CN->getZExtValue();
2743
2744 // Otherwise, break this down into an LIS + disp.
2745 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2746
2747 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2748 MVT::i32);
2749 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2750 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2751 return true;
2752 }
2753 }
2754
2755 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2757 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2758 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2759 } else
2760 Base = N;
2761 return true; // [r+0]
2762}
2763
2764/// Similar to the 16-bit case but for instructions that take a 34-bit
2765/// displacement field (prefixed loads/stores).
2767 SDValue &Base,
2768 SelectionDAG &DAG) const {
2769 // Only on 64-bit targets.
2770 if (N.getValueType() != MVT::i64)
2771 return false;
2772
2773 SDLoc dl(N);
2774 int64_t Imm = 0;
2775
2776 if (N.getOpcode() == ISD::ADD) {
2777 if (!isIntS34Immediate(N.getOperand(1), Imm))
2778 return false;
2779 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2780 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2781 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2782 else
2783 Base = N.getOperand(0);
2784 return true;
2785 }
2786
2787 if (N.getOpcode() == ISD::OR) {
2788 if (!isIntS34Immediate(N.getOperand(1), Imm))
2789 return false;
2790 // If this is an or of disjoint bitfields, we can codegen this as an add
2791 // (for better address arithmetic) if the LHS and RHS of the OR are
2792 // provably disjoint.
2793 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2794 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2795 return false;
2796 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2797 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2798 else
2799 Base = N.getOperand(0);
2800 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2801 return true;
2802 }
2803
2804 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2805 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2806 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2807 return true;
2808 }
2809
2810 return false;
2811}
2812
2813/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2814/// represented as an indexed [r+r] operation.
2816 SDValue &Index,
2817 SelectionDAG &DAG) const {
2818 // Check to see if we can easily represent this as an [r+r] address. This
2819 // will fail if it thinks that the address is more profitably represented as
2820 // reg+imm, e.g. where imm = 0.
2821 if (SelectAddressRegReg(N, Base, Index, DAG))
2822 return true;
2823
2824 // If the address is the result of an add, we will utilize the fact that the
2825 // address calculation includes an implicit add. However, we can reduce
2826 // register pressure if we do not materialize a constant just for use as the
2827 // index register. We only get rid of the add if it is not an add of a
2828 // value and a 16-bit signed constant and both have a single use.
2829 int16_t imm = 0;
2830 if (N.getOpcode() == ISD::ADD &&
2831 (!isIntS16Immediate(N.getOperand(1), imm) ||
2832 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2833 Base = N.getOperand(0);
2834 Index = N.getOperand(1);
2835 return true;
2836 }
2837
2838 // Otherwise, do it the hard way, using R0 as the base register.
2839 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2840 N.getValueType());
2841 Index = N;
2842 return true;
2843}
2844
2845template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2846 Ty *PCRelCand = dyn_cast<Ty>(N);
2847 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2848}
2849
2850/// Returns true if this address is a PC Relative address.
2851/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2852/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2854 // This is a materialize PC Relative node. Always select this as PC Relative.
2855 Base = N;
2856 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2857 return true;
2862 return true;
2863 return false;
2864}
2865
2866/// Returns true if we should use a direct load into vector instruction
2867/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2868static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2869
2870 // If there are any other uses other than scalar to vector, then we should
2871 // keep it as a scalar load -> direct move pattern to prevent multiple
2872 // loads.
2874 if (!LD)
2875 return false;
2876
2877 EVT MemVT = LD->getMemoryVT();
2878 if (!MemVT.isSimple())
2879 return false;
2880 switch(MemVT.getSimpleVT().SimpleTy) {
2881 case MVT::i64:
2882 break;
2883 case MVT::i32:
2884 if (!ST.hasP8Vector())
2885 return false;
2886 break;
2887 case MVT::i16:
2888 case MVT::i8:
2889 if (!ST.hasP9Vector())
2890 return false;
2891 break;
2892 default:
2893 return false;
2894 }
2895
2896 SDValue LoadedVal(N, 0);
2897 if (!LoadedVal.hasOneUse())
2898 return false;
2899
2900 for (SDUse &Use : LD->uses())
2901 if (Use.getResNo() == 0 &&
2902 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2903 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2904 return false;
2905
2906 return true;
2907}
2908
2909/// getPreIndexedAddressParts - returns true by value, base pointer and
2910/// offset pointer and addressing mode by reference if the node's address
2911/// can be legally represented as pre-indexed load / store address.
2913 SDValue &Offset,
2915 SelectionDAG &DAG) const {
2916 if (DisablePPCPreinc) return false;
2917
2918 bool isLoad = true;
2919 SDValue Ptr;
2920 EVT VT;
2921 Align Alignment;
2922 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2923 Ptr = LD->getBasePtr();
2924 VT = LD->getMemoryVT();
2925 Alignment = LD->getAlign();
2926 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2927 Ptr = ST->getBasePtr();
2928 VT = ST->getMemoryVT();
2929 Alignment = ST->getAlign();
2930 isLoad = false;
2931 } else
2932 return false;
2933
2934 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2935 // instructions because we can fold these into a more efficient instruction
2936 // instead, (such as LXSD).
2937 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2938 return false;
2939 }
2940
2941 // PowerPC doesn't have preinc load/store instructions for vectors
2942 if (VT.isVector())
2943 return false;
2944
2945 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2946 // Common code will reject creating a pre-inc form if the base pointer
2947 // is a frame index, or if N is a store and the base pointer is either
2948 // the same as or a predecessor of the value being stored. Check for
2949 // those situations here, and try with swapped Base/Offset instead.
2950 bool Swap = false;
2951
2953 Swap = true;
2954 else if (!isLoad) {
2955 SDValue Val = cast<StoreSDNode>(N)->getValue();
2956 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2957 Swap = true;
2958 }
2959
2960 if (Swap)
2962
2963 AM = ISD::PRE_INC;
2964 return true;
2965 }
2966
2967 // LDU/STU can only handle immediates that are a multiple of 4.
2968 if (VT != MVT::i64) {
2969 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2970 return false;
2971 } else {
2972 // LDU/STU need an address with at least 4-byte alignment.
2973 if (Alignment < Align(4))
2974 return false;
2975
2976 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2977 return false;
2978 }
2979
2980 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2981 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2982 // sext i32 to i64 when addr mode is r+i.
2983 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2984 LD->getExtensionType() == ISD::SEXTLOAD &&
2986 return false;
2987 }
2988
2989 AM = ISD::PRE_INC;
2990 return true;
2991}
2992
2993//===----------------------------------------------------------------------===//
2994// LowerOperation implementation
2995//===----------------------------------------------------------------------===//
2996
2997/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2998/// and LoOpFlags to the target MO flags.
2999static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3000 unsigned &HiOpFlags, unsigned &LoOpFlags,
3001 const GlobalValue *GV = nullptr) {
3002 HiOpFlags = PPCII::MO_HA;
3003 LoOpFlags = PPCII::MO_LO;
3004
3005 // Don't use the pic base if not in PIC relocation model.
3006 if (IsPIC) {
3007 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3008 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3009 }
3010}
3011
3012static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3013 SelectionDAG &DAG) {
3014 SDLoc DL(HiPart);
3015 EVT PtrVT = HiPart.getValueType();
3016 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3017
3018 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3019 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3020
3021 // With PIC, the first instruction is actually "GR+hi(&G)".
3022 if (isPIC)
3023 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3024 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3025
3026 // Generate non-pic code that has direct accesses to the constant pool.
3027 // The address of the global is just (hi(&g)+lo(&g)).
3028 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3029}
3030
3032 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3033 FuncInfo->setUsesTOCBasePtr();
3034}
3035
3039
3040SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3041 SDValue GA) const {
3042 EVT VT = Subtarget.getScalarIntVT();
3043 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3044 : Subtarget.isAIXABI()
3045 ? DAG.getRegister(PPC::R2, VT)
3046 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3047 SDValue Ops[] = { GA, Reg };
3048 return DAG.getMemIntrinsicNode(
3049 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3052}
3053
3054SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3055 SelectionDAG &DAG) const {
3056 EVT PtrVT = Op.getValueType();
3057 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3058 const Constant *C = CP->getConstVal();
3059
3060 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3061 // The actual address of the GlobalValue is stored in the TOC.
3062 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3063 if (Subtarget.isUsingPCRelativeCalls()) {
3064 SDLoc DL(CP);
3065 EVT Ty = getPointerTy(DAG.getDataLayout());
3066 SDValue ConstPool = DAG.getTargetConstantPool(
3067 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3068 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3069 }
3070 setUsesTOCBasePtr(DAG);
3071 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3072 return getTOCEntry(DAG, SDLoc(CP), GA);
3073 }
3074
3075 unsigned MOHiFlag, MOLoFlag;
3076 bool IsPIC = isPositionIndependent();
3077 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3078
3079 if (IsPIC && Subtarget.isSVR4ABI()) {
3080 SDValue GA =
3082 return getTOCEntry(DAG, SDLoc(CP), GA);
3083 }
3084
3085 SDValue CPIHi =
3086 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3087 SDValue CPILo =
3088 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3089 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3090}
3091
3092// For 64-bit PowerPC, prefer the more compact relative encodings.
3093// This trades 32 bits per jump table entry for one or two instructions
3094// on the jump site.
3101
3104 return false;
3105 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3106 return true;
3108}
3109
3111 SelectionDAG &DAG) const {
3112 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3114
3115 switch (getTargetMachine().getCodeModel()) {
3116 case CodeModel::Small:
3117 case CodeModel::Medium:
3119 default:
3120 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3122 }
3123}
3124
3125const MCExpr *
3127 unsigned JTI,
3128 MCContext &Ctx) const {
3129 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3131
3132 switch (getTargetMachine().getCodeModel()) {
3133 case CodeModel::Small:
3134 case CodeModel::Medium:
3136 default:
3137 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3138 }
3139}
3140
3141SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3142 EVT PtrVT = Op.getValueType();
3144
3145 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3146 if (Subtarget.isUsingPCRelativeCalls()) {
3147 SDLoc DL(JT);
3148 EVT Ty = getPointerTy(DAG.getDataLayout());
3149 SDValue GA =
3151 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3152 return MatAddr;
3153 }
3154
3155 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3156 // The actual address of the GlobalValue is stored in the TOC.
3157 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3158 setUsesTOCBasePtr(DAG);
3159 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3160 return getTOCEntry(DAG, SDLoc(JT), GA);
3161 }
3162
3163 unsigned MOHiFlag, MOLoFlag;
3164 bool IsPIC = isPositionIndependent();
3165 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3166
3167 if (IsPIC && Subtarget.isSVR4ABI()) {
3168 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3170 return getTOCEntry(DAG, SDLoc(GA), GA);
3171 }
3172
3173 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3174 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3175 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3176}
3177
3178SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3179 SelectionDAG &DAG) const {
3180 EVT PtrVT = Op.getValueType();
3181 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3182 const BlockAddress *BA = BASDN->getBlockAddress();
3183
3184 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3185 if (Subtarget.isUsingPCRelativeCalls()) {
3186 SDLoc DL(BASDN);
3187 EVT Ty = getPointerTy(DAG.getDataLayout());
3188 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3190 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3191 return MatAddr;
3192 }
3193
3194 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3195 // The actual BlockAddress is stored in the TOC.
3196 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3197 setUsesTOCBasePtr(DAG);
3198 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3199 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3200 }
3201
3202 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3203 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3204 return getTOCEntry(
3205 DAG, SDLoc(BASDN),
3206 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3207
3208 unsigned MOHiFlag, MOLoFlag;
3209 bool IsPIC = isPositionIndependent();
3210 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3211 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3212 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3213 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3214}
3215
3216SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3217 SelectionDAG &DAG) const {
3218 if (Subtarget.isAIXABI())
3219 return LowerGlobalTLSAddressAIX(Op, DAG);
3220
3221 return LowerGlobalTLSAddressLinux(Op, DAG);
3222}
3223
3224/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3225/// and then apply the update.
3227 SelectionDAG &DAG,
3228 const TargetMachine &TM) {
3229 // Initialize TLS model opt setting lazily:
3230 // (1) Use initial-exec for single TLS var references within current function.
3231 // (2) Use local-dynamic for multiple TLS var references within current
3232 // function.
3233 PPCFunctionInfo *FuncInfo =
3235 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3237 // Iterate over all instructions within current function, collect all TLS
3238 // global variables (global variables taken as the first parameter to
3239 // Intrinsic::threadlocal_address).
3240 const Function &Func = DAG.getMachineFunction().getFunction();
3241 for (const BasicBlock &BB : Func)
3242 for (const Instruction &I : BB)
3243 if (I.getOpcode() == Instruction::Call)
3244 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3245 if (Function *CF = CI->getCalledFunction())
3246 if (CF->isDeclaration() &&
3247 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3248 if (const GlobalValue *GV =
3249 dyn_cast<GlobalValue>(I.getOperand(0))) {
3250 TLSModel::Model GVModel = TM.getTLSModel(GV);
3251 if (GVModel == TLSModel::LocalDynamic)
3252 TLSGV.insert(GV);
3253 }
3254
3255 unsigned TLSGVCnt = TLSGV.size();
3256 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3257 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3258 FuncInfo->setAIXFuncUseTLSIEForLD();
3260 }
3261
3262 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3263 LLVM_DEBUG(
3264 dbgs() << DAG.getMachineFunction().getName()
3265 << " function is using the TLS-IE model for TLS-LD access.\n");
3266 Model = TLSModel::InitialExec;
3267 }
3268}
3269
3270SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3271 SelectionDAG &DAG) const {
3272 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3273
3274 if (DAG.getTarget().useEmulatedTLS())
3275 report_fatal_error("Emulated TLS is not yet supported on AIX");
3276
3277 SDLoc dl(GA);
3278 const GlobalValue *GV = GA->getGlobal();
3279 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3280 bool Is64Bit = Subtarget.isPPC64();
3282
3283 // Apply update to the TLS model.
3284 if (Subtarget.hasAIXShLibTLSModelOpt())
3286
3287 // TLS variables are accessed through TOC entries.
3288 // To support this, set the DAG to use the TOC base pointer.
3289 setUsesTOCBasePtr(DAG);
3290
3291 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3292
3293 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3294 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3295 bool HasAIXSmallTLSGlobalAttr = false;
3296 SDValue VariableOffsetTGA =
3297 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3298 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3299 SDValue TLSReg;
3300
3301 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3302 if (GVar->hasAttribute("aix-small-tls"))
3303 HasAIXSmallTLSGlobalAttr = true;
3304
3305 if (Is64Bit) {
3306 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3307 // involves a load of the variable offset (from the TOC), followed by an
3308 // add of the loaded variable offset to R13 (the thread pointer).
3309 // This code sequence looks like:
3310 // ld reg1,var[TC](2)
3311 // add reg2, reg1, r13 // r13 contains the thread pointer
3312 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3313
3314 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3315 // global variable attribute, produce a faster access sequence for
3316 // local-exec TLS variables where the offset from the TLS base is encoded
3317 // as an immediate operand.
3318 //
3319 // We only utilize the faster local-exec access sequence when the TLS
3320 // variable has a size within the policy limit. We treat types that are
3321 // not sized or are empty as being over the policy size limit.
3322 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3323 IsTLSLocalExecModel) {
3324 Type *GVType = GV->getValueType();
3325 if (GVType->isSized() && !GVType->isEmptyTy() &&
3326 GV->getDataLayout().getTypeAllocSize(GVType) <=
3328 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3329 }
3330 } else {
3331 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3332 // involves loading the variable offset from the TOC, generating a call to
3333 // .__get_tpointer to get the thread pointer (which will be in R3), and
3334 // adding the two together:
3335 // lwz reg1,var[TC](2)
3336 // bla .__get_tpointer
3337 // add reg2, reg1, r3
3338 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3339
3340 // We do not implement the 32-bit version of the faster access sequence
3341 // for local-exec that is controlled by the -maix-small-local-exec-tls
3342 // option, or the "aix-small-tls" global variable attribute.
3343 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3344 report_fatal_error("The small-local-exec TLS access sequence is "
3345 "currently only supported on AIX (64-bit mode).");
3346 }
3347 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3348 }
3349
3350 if (Model == TLSModel::LocalDynamic) {
3351 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3352
3353 // We do not implement the 32-bit version of the faster access sequence
3354 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3355 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3356 report_fatal_error("The small-local-dynamic TLS access sequence is "
3357 "currently only supported on AIX (64-bit mode).");
3358
3359 // For local-dynamic on AIX, we need to generate one TOC entry for each
3360 // variable offset, and a single module-handle TOC entry for the entire
3361 // file.
3362
3363 SDValue VariableOffsetTGA =
3364 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3365 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3366
3368 GlobalVariable *TLSGV =
3369 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3370 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3372 assert(TLSGV && "Not able to create GV for _$TLSML.");
3373 SDValue ModuleHandleTGA =
3374 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3375 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3376 SDValue ModuleHandle =
3377 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3378
3379 // With the -maix-small-local-dynamic-tls option, produce a faster access
3380 // sequence for local-dynamic TLS variables where the offset from the
3381 // module-handle is encoded as an immediate operand.
3382 //
3383 // We only utilize the faster local-dynamic access sequence when the TLS
3384 // variable has a size within the policy limit. We treat types that are
3385 // not sized or are empty as being over the policy size limit.
3386 if (HasAIXSmallLocalDynamicTLS) {
3387 Type *GVType = GV->getValueType();
3388 if (GVType->isSized() && !GVType->isEmptyTy() &&
3389 GV->getDataLayout().getTypeAllocSize(GVType) <=
3391 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3392 ModuleHandle);
3393 }
3394
3395 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3396 }
3397
3398 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3399 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3400 // need to generate two TOC entries, one for the variable offset, one for the
3401 // region handle. The global address for the TOC entry of the region handle is
3402 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3403 // entry of the variable offset is created with MO_TLSGD_FLAG.
3404 SDValue VariableOffsetTGA =
3405 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3406 SDValue RegionHandleTGA =
3407 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3408 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3409 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3410 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3411 RegionHandle);
3412}
3413
3414SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3415 SelectionDAG &DAG) const {
3416 // FIXME: TLS addresses currently use medium model code sequences,
3417 // which is the most useful form. Eventually support for small and
3418 // large models could be added if users need it, at the cost of
3419 // additional complexity.
3420 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3421 if (DAG.getTarget().useEmulatedTLS())
3422 return LowerToTLSEmulatedModel(GA, DAG);
3423
3424 SDLoc dl(GA);
3425 const GlobalValue *GV = GA->getGlobal();
3426 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3427 bool is64bit = Subtarget.isPPC64();
3428 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3429 PICLevel::Level picLevel = M->getPICLevel();
3430
3431 const TargetMachine &TM = getTargetMachine();
3432 TLSModel::Model Model = TM.getTLSModel(GV);
3433
3434 if (Model == TLSModel::LocalExec) {
3435 if (Subtarget.isUsingPCRelativeCalls()) {
3436 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3437 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3439 SDValue MatAddr =
3440 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3441 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3442 }
3443
3444 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3446 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3448 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3449 : DAG.getRegister(PPC::R2, MVT::i32);
3450
3451 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3452 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3453 }
3454
3455 if (Model == TLSModel::InitialExec) {
3456 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3458 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3459 SDValue TGATLS = DAG.getTargetGlobalAddress(
3460 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3461 SDValue TPOffset;
3462 if (IsPCRel) {
3463 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3464 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3465 MachinePointerInfo());
3466 } else {
3467 SDValue GOTPtr;
3468 if (is64bit) {
3469 setUsesTOCBasePtr(DAG);
3470 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3471 GOTPtr =
3472 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3473 } else {
3474 if (!TM.isPositionIndependent())
3475 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3476 else if (picLevel == PICLevel::SmallPIC)
3477 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3478 else
3479 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3480 }
3481 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3482 }
3483 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3484 }
3485
3486 if (Model == TLSModel::GeneralDynamic) {
3487 if (Subtarget.isUsingPCRelativeCalls()) {
3488 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3490 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3491 }
3492
3493 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3494 SDValue GOTPtr;
3495 if (is64bit) {
3496 setUsesTOCBasePtr(DAG);
3497 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3498 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3499 GOTReg, TGA);
3500 } else {
3501 if (picLevel == PICLevel::SmallPIC)
3502 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3503 else
3504 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3505 }
3506 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3507 GOTPtr, TGA, TGA);
3508 }
3509
3510 if (Model == TLSModel::LocalDynamic) {
3511 if (Subtarget.isUsingPCRelativeCalls()) {
3512 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3514 SDValue MatPCRel =
3515 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3516 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3517 }
3518
3519 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3520 SDValue GOTPtr;
3521 if (is64bit) {
3522 setUsesTOCBasePtr(DAG);
3523 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3524 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3525 GOTReg, TGA);
3526 } else {
3527 if (picLevel == PICLevel::SmallPIC)
3528 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3529 else
3530 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3531 }
3532 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3533 PtrVT, GOTPtr, TGA, TGA);
3534 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3535 PtrVT, TLSAddr, TGA);
3536 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3537 }
3538
3539 llvm_unreachable("Unknown TLS model!");
3540}
3541
3542SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3543 SelectionDAG &DAG) const {
3544 EVT PtrVT = Op.getValueType();
3545 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3546 SDLoc DL(GSDN);
3547 const GlobalValue *GV = GSDN->getGlobal();
3548
3549 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3550 // The actual address of the GlobalValue is stored in the TOC.
3551 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3552 if (Subtarget.isUsingPCRelativeCalls()) {
3553 EVT Ty = getPointerTy(DAG.getDataLayout());
3555 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3557 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3558 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3559 MachinePointerInfo());
3560 return Load;
3561 } else {
3562 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3564 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3565 }
3566 }
3567 setUsesTOCBasePtr(DAG);
3568 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3569 return getTOCEntry(DAG, DL, GA);
3570 }
3571
3572 unsigned MOHiFlag, MOLoFlag;
3573 bool IsPIC = isPositionIndependent();
3574 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3575
3576 if (IsPIC && Subtarget.isSVR4ABI()) {
3577 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3578 GSDN->getOffset(),
3580 return getTOCEntry(DAG, DL, GA);
3581 }
3582
3583 SDValue GAHi =
3584 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3585 SDValue GALo =
3586 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3587
3588 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3589}
3590
3591SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3592 bool IsStrict = Op->isStrictFPOpcode();
3593 ISD::CondCode CC =
3594 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3595 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3596 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3597 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3598 EVT LHSVT = LHS.getValueType();
3599 SDLoc dl(Op);
3600
3601 // Soften the setcc with libcall if it is fp128.
3602 if (LHSVT == MVT::f128) {
3603 assert(!Subtarget.hasP9Vector() &&
3604 "SETCC for f128 is already legal under Power9!");
3605 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3606 Op->getOpcode() == ISD::STRICT_FSETCCS);
3607 if (RHS.getNode())
3608 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3609 DAG.getCondCode(CC));
3610 if (IsStrict)
3611 return DAG.getMergeValues({LHS, Chain}, dl);
3612 return LHS;
3613 }
3614
3615 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3616
3617 if (Op.getValueType() == MVT::v2i64) {
3618 // When the operands themselves are v2i64 values, we need to do something
3619 // special because VSX has no underlying comparison operations for these.
3620 if (LHS.getValueType() == MVT::v2i64) {
3621 // Equality can be handled by casting to the legal type for Altivec
3622 // comparisons, everything else needs to be expanded.
3623 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3624 return SDValue();
3625 SDValue SetCC32 = DAG.getSetCC(
3626 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3627 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3628 int ShuffV[] = {1, 0, 3, 2};
3629 SDValue Shuff =
3630 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3631 return DAG.getBitcast(MVT::v2i64,
3632 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3633 dl, MVT::v4i32, Shuff, SetCC32));
3634 }
3635
3636 // We handle most of these in the usual way.
3637 return Op;
3638 }
3639
3640 // If we're comparing for equality to zero, expose the fact that this is
3641 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3642 // fold the new nodes.
3643 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3644 return V;
3645
3646 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3647 // Leave comparisons against 0 and -1 alone for now, since they're usually
3648 // optimized. FIXME: revisit this when we can custom lower all setcc
3649 // optimizations.
3650 if (C->isAllOnes() || C->isZero())
3651 return SDValue();
3652 }
3653
3654 // If we have an integer seteq/setne, turn it into a compare against zero
3655 // by xor'ing the rhs with the lhs, which is faster than setting a
3656 // condition register, reading it back out, and masking the correct bit. The
3657 // normal approach here uses sub to do this instead of xor. Using xor exposes
3658 // the result to other bit-twiddling opportunities.
3659 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3660 EVT VT = Op.getValueType();
3661 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3662 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3663 }
3664 return SDValue();
3665}
3666
3667SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3668 SDNode *Node = Op.getNode();
3669 EVT VT = Node->getValueType(0);
3670 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3671 SDValue InChain = Node->getOperand(0);
3672 SDValue VAListPtr = Node->getOperand(1);
3673 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3674 SDLoc dl(Node);
3675
3676 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3677
3678 // gpr_index
3679 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3680 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3681 InChain = GprIndex.getValue(1);
3682
3683 if (VT == MVT::i64) {
3684 // Check if GprIndex is even
3685 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3686 DAG.getConstant(1, dl, MVT::i32));
3687 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3688 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3689 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3690 DAG.getConstant(1, dl, MVT::i32));
3691 // Align GprIndex to be even if it isn't
3692 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3693 GprIndex);
3694 }
3695
3696 // fpr index is 1 byte after gpr
3697 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3698 DAG.getConstant(1, dl, MVT::i32));
3699
3700 // fpr
3701 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3702 FprPtr, MachinePointerInfo(SV), MVT::i8);
3703 InChain = FprIndex.getValue(1);
3704
3705 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3706 DAG.getConstant(8, dl, MVT::i32));
3707
3708 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3709 DAG.getConstant(4, dl, MVT::i32));
3710
3711 // areas
3712 SDValue OverflowArea =
3713 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3714 InChain = OverflowArea.getValue(1);
3715
3716 SDValue RegSaveArea =
3717 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3718 InChain = RegSaveArea.getValue(1);
3719
3720 // select overflow_area if index > 8
3721 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3722 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3723
3724 // adjustment constant gpr_index * 4/8
3725 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3726 VT.isInteger() ? GprIndex : FprIndex,
3727 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3728 MVT::i32));
3729
3730 // OurReg = RegSaveArea + RegConstant
3731 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3732 RegConstant);
3733
3734 // Floating types are 32 bytes into RegSaveArea
3735 if (VT.isFloatingPoint())
3736 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3737 DAG.getConstant(32, dl, MVT::i32));
3738
3739 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3740 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3741 VT.isInteger() ? GprIndex : FprIndex,
3742 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3743 MVT::i32));
3744
3745 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3746 VT.isInteger() ? VAListPtr : FprPtr,
3747 MachinePointerInfo(SV), MVT::i8);
3748
3749 // determine if we should load from reg_save_area or overflow_area
3750 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3751
3752 // increase overflow_area by 4/8 if gpr/fpr > 8
3753 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3754 DAG.getConstant(VT.isInteger() ? 4 : 8,
3755 dl, MVT::i32));
3756
3757 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3758 OverflowAreaPlusN);
3759
3760 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3761 MachinePointerInfo(), MVT::i32);
3762
3763 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3764}
3765
3766SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3767 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3768
3769 // We have to copy the entire va_list struct:
3770 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3771 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3772 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3773 false, true, /*CI=*/nullptr, std::nullopt,
3774 MachinePointerInfo(), MachinePointerInfo());
3775}
3776
3777SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3778 SelectionDAG &DAG) const {
3779 return Op.getOperand(0);
3780}
3781
3782SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3783 MachineFunction &MF = DAG.getMachineFunction();
3784 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3785
3786 assert((Op.getOpcode() == ISD::INLINEASM ||
3787 Op.getOpcode() == ISD::INLINEASM_BR) &&
3788 "Expecting Inline ASM node.");
3789
3790 // If an LR store is already known to be required then there is not point in
3791 // checking this ASM as well.
3792 if (MFI.isLRStoreRequired())
3793 return Op;
3794
3795 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3796 // type MVT::Glue. We want to ignore this last operand if that is the case.
3797 unsigned NumOps = Op.getNumOperands();
3798 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3799 --NumOps;
3800
3801 // Check all operands that may contain the LR.
3802 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3803 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3804 unsigned NumVals = Flags.getNumOperandRegisters();
3805 ++i; // Skip the ID value.
3806
3807 switch (Flags.getKind()) {
3808 default:
3809 llvm_unreachable("Bad flags!");
3813 i += NumVals;
3814 break;
3818 for (; NumVals; --NumVals, ++i) {
3819 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3820 if (Reg != PPC::LR && Reg != PPC::LR8)
3821 continue;
3822 MFI.setLRStoreRequired();
3823 return Op;
3824 }
3825 break;
3826 }
3827 }
3828 }
3829
3830 return Op;
3831}
3832
3833SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3834 SelectionDAG &DAG) const {
3835 SDValue Chain = Op.getOperand(0);
3836 SDValue Trmp = Op.getOperand(1); // trampoline
3837 SDValue FPtr = Op.getOperand(2); // nested function
3838 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3839 SDLoc dl(Op);
3840
3841 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3842
3843 if (Subtarget.isAIXABI()) {
3844 // On AIX we create a trampoline descriptor by combining the
3845 // entry point and TOC from the global descriptor (FPtr) with the
3846 // nest argument as the environment pointer.
3847 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3848 MaybeAlign PointerAlign(PointerSize);
3849 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3852 : MachineMemOperand::MONone;
3853
3854 uint64_t TOCPointerOffset = 1 * PointerSize;
3855 uint64_t EnvPointerOffset = 2 * PointerSize;
3856 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3857 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3858
3859 const Value *TrampolineAddr =
3860 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3861 const Function *Func =
3862 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3863
3864 SDValue OutChains[3];
3865
3866 // Copy the entry point address from the global descriptor to the
3867 // trampoline buffer.
3868 SDValue LoadEntryPoint =
3869 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3870 PointerAlign, MMOFlags);
3871 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3872 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3873 MachinePointerInfo(TrampolineAddr, 0));
3874
3875 // Copy the TOC pointer from the global descriptor to the trampoline
3876 // buffer.
3877 SDValue TOCFromDescriptorPtr =
3878 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3879 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3880 MachinePointerInfo(Func, TOCPointerOffset),
3881 PointerAlign, MMOFlags);
3882 SDValue TrampolineTOCPointer =
3883 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3884 SDValue TOCLoadChain = TOCReg.getValue(1);
3885 OutChains[1] =
3886 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3887 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3888
3889 // Store the nest argument into the environment pointer in the trampoline
3890 // buffer.
3891 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3892 OutChains[2] =
3893 DAG.getStore(Chain, dl, Nest, EnvPointer,
3894 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3895
3897 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3898 return TokenFactor;
3899 }
3900
3901 bool isPPC64 = (PtrVT == MVT::i64);
3902 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3903
3905 Args.emplace_back(Trmp, IntPtrTy);
3906 // TrampSize == (isPPC64 ? 48 : 40);
3907 Args.emplace_back(
3908 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3909 IntPtrTy);
3910 Args.emplace_back(FPtr, IntPtrTy);
3911 Args.emplace_back(Nest, IntPtrTy);
3912
3913 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3914 TargetLowering::CallLoweringInfo CLI(DAG);
3915 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3917 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3918
3919 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3920 return CallResult.second;
3921}
3922
3923SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3924 MachineFunction &MF = DAG.getMachineFunction();
3925 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3926 EVT PtrVT = getPointerTy(MF.getDataLayout());
3927
3928 SDLoc dl(Op);
3929
3930 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3931 // vastart just stores the address of the VarArgsFrameIndex slot into the
3932 // memory location argument.
3933 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3934 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3935 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3936 MachinePointerInfo(SV));
3937 }
3938
3939 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3940 // We suppose the given va_list is already allocated.
3941 //
3942 // typedef struct {
3943 // char gpr; /* index into the array of 8 GPRs
3944 // * stored in the register save area
3945 // * gpr=0 corresponds to r3,
3946 // * gpr=1 to r4, etc.
3947 // */
3948 // char fpr; /* index into the array of 8 FPRs
3949 // * stored in the register save area
3950 // * fpr=0 corresponds to f1,
3951 // * fpr=1 to f2, etc.
3952 // */
3953 // char *overflow_arg_area;
3954 // /* location on stack that holds
3955 // * the next overflow argument
3956 // */
3957 // char *reg_save_area;
3958 // /* where r3:r10 and f1:f8 (if saved)
3959 // * are stored
3960 // */
3961 // } va_list[1];
3962
3963 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3964 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3965 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3966 PtrVT);
3967 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3968 PtrVT);
3969
3970 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3971 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3972
3973 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3974 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3975
3976 uint64_t FPROffset = 1;
3977 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3978
3979 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3980
3981 // Store first byte : number of int regs
3982 SDValue firstStore =
3983 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3984 MachinePointerInfo(SV), MVT::i8);
3985 uint64_t nextOffset = FPROffset;
3986 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3987 ConstFPROffset);
3988
3989 // Store second byte : number of float regs
3990 SDValue secondStore =
3991 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3992 MachinePointerInfo(SV, nextOffset), MVT::i8);
3993 nextOffset += StackOffset;
3994 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3995
3996 // Store second word : arguments given on stack
3997 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3998 MachinePointerInfo(SV, nextOffset));
3999 nextOffset += FrameOffset;
4000 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4001
4002 // Store third word : arguments given in registers
4003 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4004 MachinePointerInfo(SV, nextOffset));
4005}
4006
4007/// FPR - The set of FP registers that should be allocated for arguments
4008/// on Darwin and AIX.
4009static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4010 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4011 PPC::F11, PPC::F12, PPC::F13};
4012
4013/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4014/// the stack.
4015static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4016 unsigned PtrByteSize) {
4017 unsigned ArgSize = ArgVT.getStoreSize();
4018 if (Flags.isByVal())
4019 ArgSize = Flags.getByValSize();
4020
4021 // Round up to multiples of the pointer size, except for array members,
4022 // which are always packed.
4023 if (!Flags.isInConsecutiveRegs())
4024 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4025
4026 return ArgSize;
4027}
4028
4029/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4030/// on the stack.
4032 ISD::ArgFlagsTy Flags,
4033 unsigned PtrByteSize) {
4034 Align Alignment(PtrByteSize);
4035
4036 // Altivec parameters are padded to a 16 byte boundary.
4037 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4038 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4039 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4040 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4041 Alignment = Align(16);
4042
4043 // ByVal parameters are aligned as requested.
4044 if (Flags.isByVal()) {
4045 auto BVAlign = Flags.getNonZeroByValAlign();
4046 if (BVAlign > PtrByteSize) {
4047 if (BVAlign.value() % PtrByteSize != 0)
4049 "ByVal alignment is not a multiple of the pointer size");
4050
4051 Alignment = BVAlign;
4052 }
4053 }
4054
4055 // Array members are always packed to their original alignment.
4056 if (Flags.isInConsecutiveRegs()) {
4057 // If the array member was split into multiple registers, the first
4058 // needs to be aligned to the size of the full type. (Except for
4059 // ppcf128, which is only aligned as its f64 components.)
4060 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4061 Alignment = Align(OrigVT.getStoreSize());
4062 else
4063 Alignment = Align(ArgVT.getStoreSize());
4064 }
4065
4066 return Alignment;
4067}
4068
4069/// CalculateStackSlotUsed - Return whether this argument will use its
4070/// stack slot (instead of being passed in registers). ArgOffset,
4071/// AvailableFPRs, and AvailableVRs must hold the current argument
4072/// position, and will be updated to account for this argument.
4073static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4074 unsigned PtrByteSize, unsigned LinkageSize,
4075 unsigned ParamAreaSize, unsigned &ArgOffset,
4076 unsigned &AvailableFPRs,
4077 unsigned &AvailableVRs) {
4078 bool UseMemory = false;
4079
4080 // Respect alignment of argument on the stack.
4081 Align Alignment =
4082 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4083 ArgOffset = alignTo(ArgOffset, Alignment);
4084 // If there's no space left in the argument save area, we must
4085 // use memory (this check also catches zero-sized arguments).
4086 if (ArgOffset >= LinkageSize + ParamAreaSize)
4087 UseMemory = true;
4088
4089 // Allocate argument on the stack.
4090 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4091 if (Flags.isInConsecutiveRegsLast())
4092 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4093 // If we overran the argument save area, we must use memory
4094 // (this check catches arguments passed partially in memory)
4095 if (ArgOffset > LinkageSize + ParamAreaSize)
4096 UseMemory = true;
4097
4098 // However, if the argument is actually passed in an FPR or a VR,
4099 // we don't use memory after all.
4100 if (!Flags.isByVal()) {
4101 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4102 if (AvailableFPRs > 0) {
4103 --AvailableFPRs;
4104 return false;
4105 }
4106 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4107 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4108 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4109 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4110 if (AvailableVRs > 0) {
4111 --AvailableVRs;
4112 return false;
4113 }
4114 }
4115
4116 return UseMemory;
4117}
4118
4119/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4120/// ensure minimum alignment required for target.
4122 unsigned NumBytes) {
4123 return alignTo(NumBytes, Lowering->getStackAlign());
4124}
4125
4126SDValue PPCTargetLowering::LowerFormalArguments(
4127 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4128 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4129 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4130 if (Subtarget.isAIXABI())
4131 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4132 InVals);
4133 if (Subtarget.is64BitELFABI())
4134 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4135 InVals);
4136 assert(Subtarget.is32BitELFABI());
4137 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4138 InVals);
4139}
4140
4141SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4142 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4143 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4144 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4145
4146 // 32-bit SVR4 ABI Stack Frame Layout:
4147 // +-----------------------------------+
4148 // +--> | Back chain |
4149 // | +-----------------------------------+
4150 // | | Floating-point register save area |
4151 // | +-----------------------------------+
4152 // | | General register save area |
4153 // | +-----------------------------------+
4154 // | | CR save word |
4155 // | +-----------------------------------+
4156 // | | VRSAVE save word |
4157 // | +-----------------------------------+
4158 // | | Alignment padding |
4159 // | +-----------------------------------+
4160 // | | Vector register save area |
4161 // | +-----------------------------------+
4162 // | | Local variable space |
4163 // | +-----------------------------------+
4164 // | | Parameter list area |
4165 // | +-----------------------------------+
4166 // | | LR save word |
4167 // | +-----------------------------------+
4168 // SP--> +--- | Back chain |
4169 // +-----------------------------------+
4170 //
4171 // Specifications:
4172 // System V Application Binary Interface PowerPC Processor Supplement
4173 // AltiVec Technology Programming Interface Manual
4174
4175 MachineFunction &MF = DAG.getMachineFunction();
4176 MachineFrameInfo &MFI = MF.getFrameInfo();
4177 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4178
4179 EVT PtrVT = getPointerTy(MF.getDataLayout());
4180 // Potential tail calls could cause overwriting of argument stack slots.
4181 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4182 (CallConv == CallingConv::Fast));
4183 const Align PtrAlign(4);
4184
4185 // Assign locations to all of the incoming arguments.
4187 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4188 *DAG.getContext());
4189
4190 // Reserve space for the linkage area on the stack.
4191 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4192 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4193 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4194
4195 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4196 CCValAssign &VA = ArgLocs[i];
4197
4198 // Arguments stored in registers.
4199 if (VA.isRegLoc()) {
4200 const TargetRegisterClass *RC;
4201 EVT ValVT = VA.getValVT();
4202
4203 switch (ValVT.getSimpleVT().SimpleTy) {
4204 default:
4205 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4206 case MVT::i1:
4207 case MVT::i32:
4208 RC = &PPC::GPRCRegClass;
4209 break;
4210 case MVT::f32:
4211 if (Subtarget.hasP8Vector())
4212 RC = &PPC::VSSRCRegClass;
4213 else if (Subtarget.hasSPE())
4214 RC = &PPC::GPRCRegClass;
4215 else
4216 RC = &PPC::F4RCRegClass;
4217 break;
4218 case MVT::f64:
4219 if (Subtarget.hasVSX())
4220 RC = &PPC::VSFRCRegClass;
4221 else if (Subtarget.hasSPE())
4222 // SPE passes doubles in GPR pairs.
4223 RC = &PPC::GPRCRegClass;
4224 else
4225 RC = &PPC::F8RCRegClass;
4226 break;
4227 case MVT::v16i8:
4228 case MVT::v8i16:
4229 case MVT::v4i32:
4230 RC = &PPC::VRRCRegClass;
4231 break;
4232 case MVT::v4f32:
4233 RC = &PPC::VRRCRegClass;
4234 break;
4235 case MVT::v2f64:
4236 case MVT::v2i64:
4237 RC = &PPC::VRRCRegClass;
4238 break;
4239 }
4240
4241 SDValue ArgValue;
4242 // Transform the arguments stored in physical registers into
4243 // virtual ones.
4244 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4245 assert(i + 1 < e && "No second half of double precision argument");
4246 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4247 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4248 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4249 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4250 if (!Subtarget.isLittleEndian())
4251 std::swap (ArgValueLo, ArgValueHi);
4252 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4253 ArgValueHi);
4254 } else {
4255 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4256 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4257 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4258 if (ValVT == MVT::i1)
4259 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4260 }
4261
4262 InVals.push_back(ArgValue);
4263 } else {
4264 // Argument stored in memory.
4265 assert(VA.isMemLoc());
4266
4267 // Get the extended size of the argument type in stack
4268 unsigned ArgSize = VA.getLocVT().getStoreSize();
4269 // Get the actual size of the argument type
4270 unsigned ObjSize = VA.getValVT().getStoreSize();
4271 unsigned ArgOffset = VA.getLocMemOffset();
4272 // Stack objects in PPC32 are right justified.
4273 ArgOffset += ArgSize - ObjSize;
4274 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4275
4276 // Create load nodes to retrieve arguments from the stack.
4277 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4278 InVals.push_back(
4279 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4280 }
4281 }
4282
4283 // Assign locations to all of the incoming aggregate by value arguments.
4284 // Aggregates passed by value are stored in the local variable space of the
4285 // caller's stack frame, right above the parameter list area.
4286 SmallVector<CCValAssign, 16> ByValArgLocs;
4287 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4288 ByValArgLocs, *DAG.getContext());
4289
4290 // Reserve stack space for the allocations in CCInfo.
4291 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4292
4293 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4294
4295 // Area that is at least reserved in the caller of this function.
4296 unsigned MinReservedArea = CCByValInfo.getStackSize();
4297 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4298
4299 // Set the size that is at least reserved in caller of this function. Tail
4300 // call optimized function's reserved stack space needs to be aligned so that
4301 // taking the difference between two stack areas will result in an aligned
4302 // stack.
4303 MinReservedArea =
4304 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4305 FuncInfo->setMinReservedArea(MinReservedArea);
4306
4308
4309 // If the function takes variable number of arguments, make a frame index for
4310 // the start of the first vararg value... for expansion of llvm.va_start.
4311 if (isVarArg) {
4312 static const MCPhysReg GPArgRegs[] = {
4313 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4314 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4315 };
4316 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4317
4318 static const MCPhysReg FPArgRegs[] = {
4319 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4320 PPC::F8
4321 };
4322 unsigned NumFPArgRegs = std::size(FPArgRegs);
4323
4324 if (useSoftFloat() || hasSPE())
4325 NumFPArgRegs = 0;
4326
4327 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4328 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4329
4330 // Make room for NumGPArgRegs and NumFPArgRegs.
4331 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4332 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4333
4335 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4336
4337 FuncInfo->setVarArgsFrameIndex(
4338 MFI.CreateStackObject(Depth, Align(8), false));
4339 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4340
4341 // The fixed integer arguments of a variadic function are stored to the
4342 // VarArgsFrameIndex on the stack so that they may be loaded by
4343 // dereferencing the result of va_next.
4344 for (MCPhysReg GPArgReg : GPArgRegs) {
4345 // Get an existing live-in vreg, or add a new one.
4346 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4347 if (!VReg)
4348 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4349
4350 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4351 SDValue Store =
4352 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4353 MemOps.push_back(Store);
4354 // Increment the address by four for the next argument to store
4355 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4356 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4357 }
4358
4359 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4360 // is set.
4361 // The double arguments are stored to the VarArgsFrameIndex
4362 // on the stack.
4363 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4364 // Get an existing live-in vreg, or add a new one.
4365 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4366 if (!VReg)
4367 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4368
4369 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4370 SDValue Store =
4371 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4372 MemOps.push_back(Store);
4373 // Increment the address by eight for the next argument to store
4374 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4375 PtrVT);
4376 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4377 }
4378 }
4379
4380 if (!MemOps.empty())
4381 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4382
4383 return Chain;
4384}
4385
4386// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4387// value to MVT::i64 and then truncate to the correct register size.
4388SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4389 EVT ObjectVT, SelectionDAG &DAG,
4390 SDValue ArgVal,
4391 const SDLoc &dl) const {
4392 if (Flags.isSExt())
4393 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4394 DAG.getValueType(ObjectVT));
4395 else if (Flags.isZExt())
4396 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4397 DAG.getValueType(ObjectVT));
4398
4399 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4400}
4401
4402SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4403 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4404 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4405 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4406 // TODO: add description of PPC stack frame format, or at least some docs.
4407 //
4408 bool isELFv2ABI = Subtarget.isELFv2ABI();
4409 bool isLittleEndian = Subtarget.isLittleEndian();
4410 MachineFunction &MF = DAG.getMachineFunction();
4411 MachineFrameInfo &MFI = MF.getFrameInfo();
4412 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4413
4414 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4415 "fastcc not supported on varargs functions");
4416
4417 EVT PtrVT = getPointerTy(MF.getDataLayout());
4418 // Potential tail calls could cause overwriting of argument stack slots.
4419 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4420 (CallConv == CallingConv::Fast));
4421 unsigned PtrByteSize = 8;
4422 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4423
4424 static const MCPhysReg GPR[] = {
4425 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4426 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4427 };
4428 static const MCPhysReg VR[] = {
4429 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4430 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4431 };
4432
4433 const unsigned Num_GPR_Regs = std::size(GPR);
4434 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4435 const unsigned Num_VR_Regs = std::size(VR);
4436
4437 // Do a first pass over the arguments to determine whether the ABI
4438 // guarantees that our caller has allocated the parameter save area
4439 // on its stack frame. In the ELFv1 ABI, this is always the case;
4440 // in the ELFv2 ABI, it is true if this is a vararg function or if
4441 // any parameter is located in a stack slot.
4442
4443 bool HasParameterArea = !isELFv2ABI || isVarArg;
4444 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4445 unsigned NumBytes = LinkageSize;
4446 unsigned AvailableFPRs = Num_FPR_Regs;
4447 unsigned AvailableVRs = Num_VR_Regs;
4448 for (const ISD::InputArg &In : Ins) {
4449 if (In.Flags.isNest())
4450 continue;
4451
4452 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4453 LinkageSize, ParamAreaSize, NumBytes,
4454 AvailableFPRs, AvailableVRs))
4455 HasParameterArea = true;
4456 }
4457
4458 // Add DAG nodes to load the arguments or copy them out of registers. On
4459 // entry to a function on PPC, the arguments start after the linkage area,
4460 // although the first ones are often in registers.
4461
4462 unsigned ArgOffset = LinkageSize;
4463 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4466 unsigned CurArgIdx = 0;
4467 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4468 SDValue ArgVal;
4469 bool needsLoad = false;
4470 EVT ObjectVT = Ins[ArgNo].VT;
4471 EVT OrigVT = Ins[ArgNo].ArgVT;
4472 unsigned ObjSize = ObjectVT.getStoreSize();
4473 unsigned ArgSize = ObjSize;
4474 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4475 if (Ins[ArgNo].isOrigArg()) {
4476 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4477 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4478 }
4479 // We re-align the argument offset for each argument, except when using the
4480 // fast calling convention, when we need to make sure we do that only when
4481 // we'll actually use a stack slot.
4482 unsigned CurArgOffset;
4483 Align Alignment;
4484 auto ComputeArgOffset = [&]() {
4485 /* Respect alignment of argument on the stack. */
4486 Alignment =
4487 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4488 ArgOffset = alignTo(ArgOffset, Alignment);
4489 CurArgOffset = ArgOffset;
4490 };
4491
4492 if (CallConv != CallingConv::Fast) {
4493 ComputeArgOffset();
4494
4495 /* Compute GPR index associated with argument offset. */
4496 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4497 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4498 }
4499
4500 // FIXME the codegen can be much improved in some cases.
4501 // We do not have to keep everything in memory.
4502 if (Flags.isByVal()) {
4503 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4504
4505 if (CallConv == CallingConv::Fast)
4506 ComputeArgOffset();
4507
4508 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4509 ObjSize = Flags.getByValSize();
4510 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4511 // Empty aggregate parameters do not take up registers. Examples:
4512 // struct { } a;
4513 // union { } b;
4514 // int c[0];
4515 // etc. However, we have to provide a place-holder in InVals, so
4516 // pretend we have an 8-byte item at the current address for that
4517 // purpose.
4518 if (!ObjSize) {
4519 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4520 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4521 InVals.push_back(FIN);
4522 continue;
4523 }
4524
4525 // Create a stack object covering all stack doublewords occupied
4526 // by the argument. If the argument is (fully or partially) on
4527 // the stack, or if the argument is fully in registers but the
4528 // caller has allocated the parameter save anyway, we can refer
4529 // directly to the caller's stack frame. Otherwise, create a
4530 // local copy in our own frame.
4531 int FI;
4532 if (HasParameterArea ||
4533 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4534 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4535 else
4536 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4537 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4538
4539 // Handle aggregates smaller than 8 bytes.
4540 if (ObjSize < PtrByteSize) {
4541 // The value of the object is its address, which differs from the
4542 // address of the enclosing doubleword on big-endian systems.
4543 SDValue Arg = FIN;
4544 if (!isLittleEndian) {
4545 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4546 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4547 }
4548 InVals.push_back(Arg);
4549
4550 if (GPR_idx != Num_GPR_Regs) {
4551 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4552 FuncInfo->addLiveInAttr(VReg, Flags);
4553 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4554 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4555 SDValue Store =
4556 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4557 MachinePointerInfo(&*FuncArg), ObjType);
4558 MemOps.push_back(Store);
4559 }
4560 // Whether we copied from a register or not, advance the offset
4561 // into the parameter save area by a full doubleword.
4562 ArgOffset += PtrByteSize;
4563 continue;
4564 }
4565
4566 // The value of the object is its address, which is the address of
4567 // its first stack doubleword.
4568 InVals.push_back(FIN);
4569
4570 // Store whatever pieces of the object are in registers to memory.
4571 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4572 if (GPR_idx == Num_GPR_Regs)
4573 break;
4574
4575 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4576 FuncInfo->addLiveInAttr(VReg, Flags);
4577 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4578 SDValue Addr = FIN;
4579 if (j) {
4580 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4581 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4582 }
4583 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4584 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4585 SDValue Store =
4586 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4587 MachinePointerInfo(&*FuncArg, j), ObjType);
4588 MemOps.push_back(Store);
4589 ++GPR_idx;
4590 }
4591 ArgOffset += ArgSize;
4592 continue;
4593 }
4594
4595 switch (ObjectVT.getSimpleVT().SimpleTy) {
4596 default: llvm_unreachable("Unhandled argument type!");
4597 case MVT::i1:
4598 case MVT::i32:
4599 case MVT::i64:
4600 if (Flags.isNest()) {
4601 // The 'nest' parameter, if any, is passed in R11.
4602 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4603 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4604
4605 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4606 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4607
4608 break;
4609 }
4610
4611 // These can be scalar arguments or elements of an integer array type
4612 // passed directly. Clang may use those instead of "byval" aggregate
4613 // types to avoid forcing arguments to memory unnecessarily.
4614 if (GPR_idx != Num_GPR_Regs) {
4615 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4616 FuncInfo->addLiveInAttr(VReg, Flags);
4617 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4618
4619 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4620 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4621 // value to MVT::i64 and then truncate to the correct register size.
4622 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4623 } else {
4624 if (CallConv == CallingConv::Fast)
4625 ComputeArgOffset();
4626
4627 needsLoad = true;
4628 ArgSize = PtrByteSize;
4629 }
4630 if (CallConv != CallingConv::Fast || needsLoad)
4631 ArgOffset += 8;
4632 break;
4633
4634 case MVT::f32:
4635 case MVT::f64:
4636 // These can be scalar arguments or elements of a float array type
4637 // passed directly. The latter are used to implement ELFv2 homogenous
4638 // float aggregates.
4639 if (FPR_idx != Num_FPR_Regs) {
4640 unsigned VReg;
4641
4642 if (ObjectVT == MVT::f32)
4643 VReg = MF.addLiveIn(FPR[FPR_idx],
4644 Subtarget.hasP8Vector()
4645 ? &PPC::VSSRCRegClass
4646 : &PPC::F4RCRegClass);
4647 else
4648 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4649 ? &PPC::VSFRCRegClass
4650 : &PPC::F8RCRegClass);
4651
4652 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4653 ++FPR_idx;
4654 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4655 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4656 // once we support fp <-> gpr moves.
4657
4658 // This can only ever happen in the presence of f32 array types,
4659 // since otherwise we never run out of FPRs before running out
4660 // of GPRs.
4661 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4662 FuncInfo->addLiveInAttr(VReg, Flags);
4663 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4664
4665 if (ObjectVT == MVT::f32) {
4666 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4667 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4668 DAG.getConstant(32, dl, MVT::i32));
4669 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4670 }
4671
4672 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4673 } else {
4674 if (CallConv == CallingConv::Fast)
4675 ComputeArgOffset();
4676
4677 needsLoad = true;
4678 }
4679
4680 // When passing an array of floats, the array occupies consecutive
4681 // space in the argument area; only round up to the next doubleword
4682 // at the end of the array. Otherwise, each float takes 8 bytes.
4683 if (CallConv != CallingConv::Fast || needsLoad) {
4684 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4685 ArgOffset += ArgSize;
4686 if (Flags.isInConsecutiveRegsLast())
4687 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4688 }
4689 break;
4690 case MVT::v4f32:
4691 case MVT::v4i32:
4692 case MVT::v8i16:
4693 case MVT::v16i8:
4694 case MVT::v2f64:
4695 case MVT::v2i64:
4696 case MVT::v1i128:
4697 case MVT::f128:
4698 // These can be scalar arguments or elements of a vector array type
4699 // passed directly. The latter are used to implement ELFv2 homogenous
4700 // vector aggregates.
4701 if (VR_idx != Num_VR_Regs) {
4702 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4703 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4704 ++VR_idx;
4705 } else {
4706 if (CallConv == CallingConv::Fast)
4707 ComputeArgOffset();
4708 needsLoad = true;
4709 }
4710 if (CallConv != CallingConv::Fast || needsLoad)
4711 ArgOffset += 16;
4712 break;
4713 }
4714
4715 // We need to load the argument to a virtual register if we determined
4716 // above that we ran out of physical registers of the appropriate type.
4717 if (needsLoad) {
4718 if (ObjSize < ArgSize && !isLittleEndian)
4719 CurArgOffset += ArgSize - ObjSize;
4720 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4721 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4722 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4723 }
4724
4725 InVals.push_back(ArgVal);
4726 }
4727
4728 // Area that is at least reserved in the caller of this function.
4729 unsigned MinReservedArea;
4730 if (HasParameterArea)
4731 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4732 else
4733 MinReservedArea = LinkageSize;
4734
4735 // Set the size that is at least reserved in caller of this function. Tail
4736 // call optimized functions' reserved stack space needs to be aligned so that
4737 // taking the difference between two stack areas will result in an aligned
4738 // stack.
4739 MinReservedArea =
4740 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4741 FuncInfo->setMinReservedArea(MinReservedArea);
4742
4743 // If the function takes variable number of arguments, make a frame index for
4744 // the start of the first vararg value... for expansion of llvm.va_start.
4745 // On ELFv2ABI spec, it writes:
4746 // C programs that are intended to be *portable* across different compilers
4747 // and architectures must use the header file <stdarg.h> to deal with variable
4748 // argument lists.
4749 if (isVarArg && MFI.hasVAStart()) {
4750 int Depth = ArgOffset;
4751
4752 FuncInfo->setVarArgsFrameIndex(
4753 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4754 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4755
4756 // If this function is vararg, store any remaining integer argument regs
4757 // to their spots on the stack so that they may be loaded by dereferencing
4758 // the result of va_next.
4759 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4760 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4761 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4762 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4763 SDValue Store =
4764 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4765 MemOps.push_back(Store);
4766 // Increment the address by four for the next argument to store
4767 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4768 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4769 }
4770 }
4771
4772 if (!MemOps.empty())
4773 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4774
4775 return Chain;
4776}
4777
4778/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4779/// adjusted to accommodate the arguments for the tailcall.
4780static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4781 unsigned ParamSize) {
4782
4783 if (!isTailCall) return 0;
4784
4786 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4787 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4788 // Remember only if the new adjustment is bigger.
4789 if (SPDiff < FI->getTailCallSPDelta())
4790 FI->setTailCallSPDelta(SPDiff);
4791
4792 return SPDiff;
4793}
4794
4795static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4796
4797static bool callsShareTOCBase(const Function *Caller,
4798 const GlobalValue *CalleeGV,
4799 const TargetMachine &TM) {
4800 // It does not make sense to call callsShareTOCBase() with a caller that
4801 // is PC Relative since PC Relative callers do not have a TOC.
4802#ifndef NDEBUG
4803 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4804 assert(!STICaller->isUsingPCRelativeCalls() &&
4805 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4806#endif
4807
4808 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4809 // don't have enough information to determine if the caller and callee share
4810 // the same TOC base, so we have to pessimistically assume they don't for
4811 // correctness.
4812 if (!CalleeGV)
4813 return false;
4814
4815 // If the callee is preemptable, then the static linker will use a plt-stub
4816 // which saves the toc to the stack, and needs a nop after the call
4817 // instruction to convert to a toc-restore.
4818 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4819 return false;
4820
4821 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4822 // We may need a TOC restore in the situation where the caller requires a
4823 // valid TOC but the callee is PC Relative and does not.
4824 const Function *F = dyn_cast<Function>(CalleeGV);
4825 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4826
4827 // If we have an Alias we can try to get the function from there.
4828 if (Alias) {
4829 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4830 F = dyn_cast<Function>(GlobalObj);
4831 }
4832
4833 // If we still have no valid function pointer we do not have enough
4834 // information to determine if the callee uses PC Relative calls so we must
4835 // assume that it does.
4836 if (!F)
4837 return false;
4838
4839 // If the callee uses PC Relative we cannot guarantee that the callee won't
4840 // clobber the TOC of the caller and so we must assume that the two
4841 // functions do not share a TOC base.
4842 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4843 if (STICallee->isUsingPCRelativeCalls())
4844 return false;
4845
4846 // If the GV is not a strong definition then we need to assume it can be
4847 // replaced by another function at link time. The function that replaces
4848 // it may not share the same TOC as the caller since the callee may be
4849 // replaced by a PC Relative version of the same function.
4850 if (!CalleeGV->isStrongDefinitionForLinker())
4851 return false;
4852
4853 // The medium and large code models are expected to provide a sufficiently
4854 // large TOC to provide all data addressing needs of a module with a
4855 // single TOC.
4856 if (CodeModel::Medium == TM.getCodeModel() ||
4858 return true;
4859
4860 // Any explicitly-specified sections and section prefixes must also match.
4861 // Also, if we're using -ffunction-sections, then each function is always in
4862 // a different section (the same is true for COMDAT functions).
4863 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4864 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4865 return false;
4866 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4867 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4868 return false;
4869 }
4870
4871 return true;
4872}
4873
4874static bool
4876 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4877 assert(Subtarget.is64BitELFABI());
4878
4879 const unsigned PtrByteSize = 8;
4880 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4881
4882 static const MCPhysReg GPR[] = {
4883 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4884 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4885 };
4886 static const MCPhysReg VR[] = {
4887 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4888 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4889 };
4890
4891 const unsigned NumGPRs = std::size(GPR);
4892 const unsigned NumFPRs = 13;
4893 const unsigned NumVRs = std::size(VR);
4894 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4895
4896 unsigned NumBytes = LinkageSize;
4897 unsigned AvailableFPRs = NumFPRs;
4898 unsigned AvailableVRs = NumVRs;
4899
4900 for (const ISD::OutputArg& Param : Outs) {
4901 if (Param.Flags.isNest()) continue;
4902
4903 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4904 LinkageSize, ParamAreaSize, NumBytes,
4905 AvailableFPRs, AvailableVRs))
4906 return true;
4907 }
4908 return false;
4909}
4910
4911static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4912 if (CB.arg_size() != CallerFn->arg_size())
4913 return false;
4914
4915 auto CalleeArgIter = CB.arg_begin();
4916 auto CalleeArgEnd = CB.arg_end();
4917 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4918
4919 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4920 const Value* CalleeArg = *CalleeArgIter;
4921 const Value* CallerArg = &(*CallerArgIter);
4922 if (CalleeArg == CallerArg)
4923 continue;
4924
4925 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4926 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4927 // }
4928 // 1st argument of callee is undef and has the same type as caller.
4929 if (CalleeArg->getType() == CallerArg->getType() &&
4930 isa<UndefValue>(CalleeArg))
4931 continue;
4932
4933 return false;
4934 }
4935
4936 return true;
4937}
4938
4939// Returns true if TCO is possible between the callers and callees
4940// calling conventions.
4941static bool
4943 CallingConv::ID CalleeCC) {
4944 // Tail calls are possible with fastcc and ccc.
4945 auto isTailCallableCC = [] (CallingConv::ID CC){
4946 return CC == CallingConv::C || CC == CallingConv::Fast;
4947 };
4948 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4949 return false;
4950
4951 // We can safely tail call both fastcc and ccc callees from a c calling
4952 // convention caller. If the caller is fastcc, we may have less stack space
4953 // than a non-fastcc caller with the same signature so disable tail-calls in
4954 // that case.
4955 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4956}
4957
4958bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4959 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4960 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4962 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4963 bool isCalleeExternalSymbol) const {
4964 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4965
4966 if (DisableSCO && !TailCallOpt) return false;
4967
4968 // Variadic argument functions are not supported.
4969 if (isVarArg) return false;
4970
4971 // Check that the calling conventions are compatible for tco.
4972 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4973 return false;
4974
4975 // Caller contains any byval parameter is not supported.
4976 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4977 return false;
4978
4979 // Callee contains any byval parameter is not supported, too.
4980 // Note: This is a quick work around, because in some cases, e.g.
4981 // caller's stack size > callee's stack size, we are still able to apply
4982 // sibling call optimization. For example, gcc is able to do SCO for caller1
4983 // in the following example, but not for caller2.
4984 // struct test {
4985 // long int a;
4986 // char ary[56];
4987 // } gTest;
4988 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4989 // b->a = v.a;
4990 // return 0;
4991 // }
4992 // void caller1(struct test a, struct test c, struct test *b) {
4993 // callee(gTest, b); }
4994 // void caller2(struct test *b) { callee(gTest, b); }
4995 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4996 return false;
4997
4998 // If callee and caller use different calling conventions, we cannot pass
4999 // parameters on stack since offsets for the parameter area may be different.
5000 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5001 return false;
5002
5003 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5004 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5005 // callee potentially have different TOC bases then we cannot tail call since
5006 // we need to restore the TOC pointer after the call.
5007 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5008 // We cannot guarantee this for indirect calls or calls to external functions.
5009 // When PC-Relative addressing is used, the concept of the TOC is no longer
5010 // applicable so this check is not required.
5011 // Check first for indirect calls.
5012 if (!Subtarget.isUsingPCRelativeCalls() &&
5013 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5014 return false;
5015
5016 // Check if we share the TOC base.
5017 if (!Subtarget.isUsingPCRelativeCalls() &&
5018 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5019 return false;
5020
5021 // TCO allows altering callee ABI, so we don't have to check further.
5022 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5023 return true;
5024
5025 if (DisableSCO) return false;
5026
5027 // If callee use the same argument list that caller is using, then we can
5028 // apply SCO on this case. If it is not, then we need to check if callee needs
5029 // stack for passing arguments.
5030 // PC Relative tail calls may not have a CallBase.
5031 // If there is no CallBase we cannot verify if we have the same argument
5032 // list so assume that we don't have the same argument list.
5033 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5034 needStackSlotPassParameters(Subtarget, Outs))
5035 return false;
5036 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5037 return false;
5038
5039 return true;
5040}
5041
5042/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5043/// for tail call optimization. Targets which want to do tail call
5044/// optimization should implement this function.
5045bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5046 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5047 CallingConv::ID CallerCC, bool isVarArg,
5048 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5049 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5050 return false;
5051
5052 // Variable argument functions are not supported.
5053 if (isVarArg)
5054 return false;
5055
5056 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5057 // Functions containing by val parameters are not supported.
5058 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5059 return false;
5060
5061 // Non-PIC/GOT tail calls are supported.
5062 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5063 return true;
5064
5065 // At the moment we can only do local tail calls (in same module, hidden
5066 // or protected) if we are generating PIC.
5067 if (CalleeGV)
5068 return CalleeGV->hasHiddenVisibility() ||
5069 CalleeGV->hasProtectedVisibility();
5070 }
5071
5072 return false;
5073}
5074
5075/// isCallCompatibleAddress - Return the immediate to use if the specified
5076/// 32-bit value is representable in the immediate field of a BxA instruction.
5079 if (!C) return nullptr;
5080
5081 int Addr = C->getZExtValue();
5082 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5083 SignExtend32<26>(Addr) != Addr)
5084 return nullptr; // Top 6 bits have to be sext of immediate.
5085
5086 return DAG
5088 (int)C->getZExtValue() >> 2, SDLoc(Op),
5090 .getNode();
5091}
5092
5093namespace {
5094
5095struct TailCallArgumentInfo {
5096 SDValue Arg;
5097 SDValue FrameIdxOp;
5098 int FrameIdx = 0;
5099
5100 TailCallArgumentInfo() = default;
5101};
5102
5103} // end anonymous namespace
5104
5105/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5107 SelectionDAG &DAG, SDValue Chain,
5108 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5109 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5110 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5111 SDValue Arg = TailCallArgs[i].Arg;
5112 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5113 int FI = TailCallArgs[i].FrameIdx;
5114 // Store relative to framepointer.
5115 MemOpChains.push_back(DAG.getStore(
5116 Chain, dl, Arg, FIN,
5118 }
5119}
5120
5121/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5122/// the appropriate stack slot for the tail call optimized function call.
5124 SDValue OldRetAddr, SDValue OldFP,
5125 int SPDiff, const SDLoc &dl) {
5126 if (SPDiff) {
5127 // Calculate the new stack slot for the return address.
5129 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5130 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5131 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5132 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5133 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5134 NewRetAddrLoc, true);
5135 SDValue NewRetAddrFrIdx =
5136 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5137 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5138 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5139 }
5140 return Chain;
5141}
5142
5143/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5144/// the position of the argument.
5146 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5147 int SPDiff, unsigned ArgOffset,
5148 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5149 int Offset = ArgOffset + SPDiff;
5150 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5151 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5152 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5153 SDValue FIN = DAG.getFrameIndex(FI, VT);
5154 TailCallArgumentInfo Info;
5155 Info.Arg = Arg;
5156 Info.FrameIdxOp = FIN;
5157 Info.FrameIdx = FI;
5158 TailCallArguments.push_back(Info);
5159}
5160
5161/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5162/// stack slot. Returns the chain as result and the loaded frame pointers in
5163/// LROpOut/FPOpout. Used when tail calling.
5164SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5165 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5166 SDValue &FPOpOut, const SDLoc &dl) const {
5167 if (SPDiff) {
5168 // Load the LR and FP stack slot for later adjusting.
5169 LROpOut = getReturnAddrFrameIndex(DAG);
5170 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5171 MachinePointerInfo());
5172 Chain = SDValue(LROpOut.getNode(), 1);
5173 }
5174 return Chain;
5175}
5176
5177/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5178/// by "Src" to address "Dst" of size "Size". Alignment information is
5179/// specified by the specific parameter attribute. The copy will be passed as
5180/// a byval function parameter.
5181/// Sometimes what we are copying is the end of a larger object, the part that
5182/// does not fit in registers.
5184 SDValue Chain, ISD::ArgFlagsTy Flags,
5185 SelectionDAG &DAG, const SDLoc &dl) {
5186 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5187 return DAG.getMemcpy(
5188 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5189 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5190}
5191
5192/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5193/// tail calls.
5195 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5196 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5197 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5198 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5200 if (!isTailCall) {
5201 if (isVector) {
5202 SDValue StackPtr;
5203 if (isPPC64)
5204 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5205 else
5206 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5207 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5208 DAG.getConstant(ArgOffset, dl, PtrVT));
5209 }
5210 MemOpChains.push_back(
5211 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5212 // Calculate and remember argument location.
5213 } else
5214 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5215 TailCallArguments);
5216}
5217
5218static void
5220 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5221 SDValue FPOp,
5222 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5223 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5224 // might overwrite each other in case of tail call optimization.
5225 SmallVector<SDValue, 8> MemOpChains2;
5226 // Do not flag preceding copytoreg stuff together with the following stuff.
5227 InGlue = SDValue();
5228 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5229 MemOpChains2, dl);
5230 if (!MemOpChains2.empty())
5231 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5232
5233 // Store the return address to the appropriate stack slot.
5234 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5235
5236 // Emit callseq_end just before tailcall node.
5237 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5238 InGlue = Chain.getValue(1);
5239}
5240
5241// Is this global address that of a function that can be called by name? (as
5242// opposed to something that must hold a descriptor for an indirect call).
5243static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5244 if (GV) {
5245 if (GV->isThreadLocal())
5246 return false;
5247
5248 return GV->getValueType()->isFunctionTy();
5249 }
5250
5251 return false;
5252}
5253
5254SDValue PPCTargetLowering::LowerCallResult(
5255 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5256 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5257 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5259 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5260 *DAG.getContext());
5261
5262 CCRetInfo.AnalyzeCallResult(
5263 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5265 : RetCC_PPC);
5266
5267 // Copy all of the result registers out of their specified physreg.
5268 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5269 CCValAssign &VA = RVLocs[i];
5270 assert(VA.isRegLoc() && "Can only return in registers!");
5271
5272 SDValue Val;
5273
5274 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5275 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5276 InGlue);
5277 Chain = Lo.getValue(1);
5278 InGlue = Lo.getValue(2);
5279 VA = RVLocs[++i]; // skip ahead to next loc
5280 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5281 InGlue);
5282 Chain = Hi.getValue(1);
5283 InGlue = Hi.getValue(2);
5284 if (!Subtarget.isLittleEndian())
5285 std::swap (Lo, Hi);
5286 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5287 } else {
5288 Val = DAG.getCopyFromReg(Chain, dl,
5289 VA.getLocReg(), VA.getLocVT(), InGlue);
5290 Chain = Val.getValue(1);
5291 InGlue = Val.getValue(2);
5292 }
5293
5294 switch (VA.getLocInfo()) {
5295 default: llvm_unreachable("Unknown loc info!");
5296 case CCValAssign::Full: break;
5297 case CCValAssign::AExt:
5298 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5299 break;
5300 case CCValAssign::ZExt:
5301 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5302 DAG.getValueType(VA.getValVT()));
5303 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5304 break;
5305 case CCValAssign::SExt:
5306 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5307 DAG.getValueType(VA.getValVT()));
5308 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5309 break;
5310 }
5311
5312 InVals.push_back(Val);
5313 }
5314
5315 return Chain;
5316}
5317
5318static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5319 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5320 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5321 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5322
5323 // PatchPoint calls are not indirect.
5324 if (isPatchPoint)
5325 return false;
5326
5328 return false;
5329
5330 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5331 // becuase the immediate function pointer points to a descriptor instead of
5332 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5333 // pointer immediate points to the global entry point, while the BLA would
5334 // need to jump to the local entry point (see rL211174).
5335 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5336 isBLACompatibleAddress(Callee, DAG))
5337 return false;
5338
5339 return true;
5340}
5341
5342// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5343static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5344 return Subtarget.isAIXABI() ||
5345 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5346}
5347
5349 const Function &Caller, const SDValue &Callee,
5350 const PPCSubtarget &Subtarget,
5351 const TargetMachine &TM,
5352 bool IsStrictFPCall = false) {
5353 if (CFlags.IsTailCall)
5354 return PPCISD::TC_RETURN;
5355
5356 unsigned RetOpc = 0;
5357 // This is a call through a function pointer.
5358 if (CFlags.IsIndirect) {
5359 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5360 // indirect calls. The save of the caller's TOC pointer to the stack will be
5361 // inserted into the DAG as part of call lowering. The restore of the TOC
5362 // pointer is modeled by using a pseudo instruction for the call opcode that
5363 // represents the 2 instruction sequence of an indirect branch and link,
5364 // immediately followed by a load of the TOC pointer from the stack save
5365 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5366 // as it is not saved or used.
5367 if (Subtarget.usePointerGlueHelper())
5368 RetOpc = PPCISD::BL_LOAD_TOC;
5369 else
5370 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5371 : PPCISD::BCTRL;
5372 } else if (Subtarget.isUsingPCRelativeCalls()) {
5373 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5374 RetOpc = PPCISD::CALL_NOTOC;
5375 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5376 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5377 // immediately following the call instruction if the caller and callee may
5378 // have different TOC bases. At link time if the linker determines the calls
5379 // may not share a TOC base, the call is redirected to a trampoline inserted
5380 // by the linker. The trampoline will (among other things) save the callers
5381 // TOC pointer at an ABI designated offset in the linkage area and the
5382 // linker will rewrite the nop to be a load of the TOC pointer from the
5383 // linkage area into gpr2.
5384 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5385 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5386 RetOpc =
5387 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5388 } else
5389 RetOpc = PPCISD::CALL;
5390 if (IsStrictFPCall) {
5391 switch (RetOpc) {
5392 default:
5393 llvm_unreachable("Unknown call opcode");
5394 case PPCISD::BCTRL_LOAD_TOC:
5395 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5396 break;
5397 case PPCISD::BCTRL:
5398 RetOpc = PPCISD::BCTRL_RM;
5399 break;
5400 case PPCISD::BL_LOAD_TOC:
5401 RetOpc = PPCISD::BL_LOAD_TOC_RM;
5402 break;
5403 case PPCISD::CALL_NOTOC:
5404 RetOpc = PPCISD::CALL_NOTOC_RM;
5405 break;
5406 case PPCISD::CALL:
5407 RetOpc = PPCISD::CALL_RM;
5408 break;
5409 case PPCISD::CALL_NOP:
5410 RetOpc = PPCISD::CALL_NOP_RM;
5411 break;
5412 }
5413 }
5414 return RetOpc;
5415}
5416
5417static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5418 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5419 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5420 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5421 return SDValue(Dest, 0);
5422
5423 // Returns true if the callee is local, and false otherwise.
5424 auto isLocalCallee = [&]() {
5426 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5427
5428 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5430 };
5431
5432 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5433 // a static relocation model causes some versions of GNU LD (2.17.50, at
5434 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5435 // built with secure-PLT.
5436 bool UsePlt =
5437 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5439
5440 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5441 const TargetMachine &TM = Subtarget.getTargetMachine();
5443 auto *S =
5444 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5445
5447 return DAG.getMCSymbol(S, PtrVT);
5448 };
5449
5450 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5451 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5452 if (isFunctionGlobalAddress(GV)) {
5453 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5454
5455 if (Subtarget.isAIXABI()) {
5456 return getAIXFuncEntryPointSymbolSDNode(GV);
5457 }
5458 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5459 UsePlt ? PPCII::MO_PLT : 0);
5460 }
5461
5463 const char *SymName = S->getSymbol();
5464 if (Subtarget.isAIXABI()) {
5465 // If there exists a user-declared function whose name is the same as the
5466 // ExternalSymbol's, then we pick up the user-declared version.
5468 if (const Function *F =
5469 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5470 return getAIXFuncEntryPointSymbolSDNode(F);
5471
5472 // On AIX, direct function calls reference the symbol for the function's
5473 // entry point, which is named by prepending a "." before the function's
5474 // C-linkage name. A Qualname is returned here because an external
5475 // function entry point is a csect with XTY_ER property.
5476 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5477 auto &Context = DAG.getMachineFunction().getContext();
5478 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5479 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5481 return Sec->getQualNameSymbol();
5482 };
5483
5484 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5485 }
5486 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5487 UsePlt ? PPCII::MO_PLT : 0);
5488 }
5489
5490 // No transformation needed.
5491 assert(Callee.getNode() && "What no callee?");
5492 return Callee;
5493}
5494
5496 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5497 "Expected a CALLSEQ_STARTSDNode.");
5498
5499 // The last operand is the chain, except when the node has glue. If the node
5500 // has glue, then the last operand is the glue, and the chain is the second
5501 // last operand.
5502 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5503 if (LastValue.getValueType() != MVT::Glue)
5504 return LastValue;
5505
5506 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5507}
5508
5509// Creates the node that moves a functions address into the count register
5510// to prepare for an indirect call instruction.
5511static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5512 SDValue &Glue, SDValue &Chain,
5513 const SDLoc &dl) {
5514 SDValue MTCTROps[] = {Chain, Callee, Glue};
5515 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5516 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5517 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5518 // The glue is the second value produced.
5519 Glue = Chain.getValue(1);
5520}
5521
5523 SDValue &Glue, SDValue &Chain,
5524 SDValue CallSeqStart,
5525 const CallBase *CB, const SDLoc &dl,
5526 bool hasNest,
5527 const PPCSubtarget &Subtarget) {
5528 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5529 // entry point, but to the function descriptor (the function entry point
5530 // address is part of the function descriptor though).
5531 // The function descriptor is a three doubleword structure with the
5532 // following fields: function entry point, TOC base address and
5533 // environment pointer.
5534 // Thus for a call through a function pointer, the following actions need
5535 // to be performed:
5536 // 1. Save the TOC of the caller in the TOC save area of its stack
5537 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5538 // 2. Load the address of the function entry point from the function
5539 // descriptor.
5540 // 3. Load the TOC of the callee from the function descriptor into r2.
5541 // 4. Load the environment pointer from the function descriptor into
5542 // r11.
5543 // 5. Branch to the function entry point address.
5544 // 6. On return of the callee, the TOC of the caller needs to be
5545 // restored (this is done in FinishCall()).
5546 //
5547 // The loads are scheduled at the beginning of the call sequence, and the
5548 // register copies are flagged together to ensure that no other
5549 // operations can be scheduled in between. E.g. without flagging the
5550 // copies together, a TOC access in the caller could be scheduled between
5551 // the assignment of the callee TOC and the branch to the callee, which leads
5552 // to incorrect code.
5553
5554 // Start by loading the function address from the descriptor.
5555 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5556 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5560
5561 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5562
5563 // Registers used in building the DAG.
5564 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5565 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5566
5567 // Offsets of descriptor members.
5568 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5569 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5570
5571 const MVT RegVT = Subtarget.getScalarIntVT();
5572 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5573
5574 // One load for the functions entry point address.
5575 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5576 Alignment, MMOFlags);
5577
5578 // One for loading the TOC anchor for the module that contains the called
5579 // function.
5580 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5581 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5582 SDValue TOCPtr =
5583 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5584 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5585
5586 // One for loading the environment pointer.
5587 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5588 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5589 SDValue LoadEnvPtr =
5590 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5591 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5592
5593
5594 // Then copy the newly loaded TOC anchor to the TOC pointer.
5595 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5596 Chain = TOCVal.getValue(0);
5597 Glue = TOCVal.getValue(1);
5598
5599 // If the function call has an explicit 'nest' parameter, it takes the
5600 // place of the environment pointer.
5601 assert((!hasNest || !Subtarget.isAIXABI()) &&
5602 "Nest parameter is not supported on AIX.");
5603 if (!hasNest) {
5604 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5605 Chain = EnvVal.getValue(0);
5606 Glue = EnvVal.getValue(1);
5607 }
5608
5609 // The rest of the indirect call sequence is the same as the non-descriptor
5610 // DAG.
5611 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5612}
5613
5615 SDValue &Glue, SDValue &Chain,
5616 SDValue CallSeqStart, const CallBase *CB,
5617 const SDLoc &dl, bool hasNest,
5618 const PPCSubtarget &Subtarget) {
5619 // On AIX there is a feature ("out of line glue code") which uses a special
5620 // trampoline function ._ptrgl to do the indirect call. If this option is
5621 // enabled we instead simply load the address of the descriptor into gpr11,
5622 // with the arguments in the 'normal' registers and branch to the ._ptrgl
5623 // stub.
5624 const MCRegister PtrGlueReg = Subtarget.getGlueCodeDescriptorRegister();
5625 SDValue MoveToPhysicalReg =
5626 DAG.getCopyToReg(Chain, dl, PtrGlueReg, Callee, Glue);
5627 Chain = MoveToPhysicalReg.getValue(0);
5628 Glue = MoveToPhysicalReg.getValue(1);
5629}
5630
5631static void
5633 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5634 SelectionDAG &DAG,
5635 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5636 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5637 const PPCSubtarget &Subtarget) {
5638 const bool IsPPC64 = Subtarget.isPPC64();
5639 // MVT for a general purpose register.
5640 const MVT RegVT = Subtarget.getScalarIntVT();
5641
5642 // First operand is always the chain.
5643 Ops.push_back(Chain);
5644
5645 // If it's a direct call pass the callee as the second operand.
5646 if (!CFlags.IsIndirect)
5647 Ops.push_back(Callee);
5648 else if (Subtarget.usePointerGlueHelper()) {
5649 Ops.push_back(Callee);
5650 // Add the register used to pass the descriptor address.
5651 Ops.push_back(
5652 DAG.getRegister(Subtarget.getGlueCodeDescriptorRegister(), RegVT));
5653 } else {
5654 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5655
5656 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5657 // on the stack (this would have been done in `LowerCall_64SVR4` or
5658 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5659 // represents both the indirect branch and a load that restores the TOC
5660 // pointer from the linkage area. The operand for the TOC restore is an add
5661 // of the TOC save offset to the stack pointer. This must be the second
5662 // operand: after the chain input but before any other variadic arguments.
5663 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5664 // saved or used.
5665 if (isTOCSaveRestoreRequired(Subtarget)) {
5666 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5667
5668 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5669 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5670 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5671 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5672 Ops.push_back(AddTOC);
5673 }
5674
5675 // Add the register used for the environment pointer.
5676 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5677 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5678 RegVT));
5679
5680
5681 // Add CTR register as callee so a bctr can be emitted later.
5682 if (CFlags.IsTailCall)
5683 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5684 }
5685
5686 // If this is a tail call add stack pointer delta.
5687 if (CFlags.IsTailCall)
5688 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5689
5690 // Add argument registers to the end of the list so that they are known live
5691 // into the call.
5692 for (const auto &[Reg, N] : RegsToPass)
5693 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5694
5695 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5696 // no way to mark dependencies as implicit here.
5697 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5698 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5699 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5700 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5701
5702 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5703 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5704 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5705
5706 // Add a register mask operand representing the call-preserved registers.
5707 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5708 const uint32_t *Mask =
5709 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5710 assert(Mask && "Missing call preserved mask for calling convention");
5711 Ops.push_back(DAG.getRegisterMask(Mask));
5712
5713 // If the glue is valid, it is the last operand.
5714 if (Glue.getNode())
5715 Ops.push_back(Glue);
5716}
5717
5718SDValue PPCTargetLowering::FinishCall(
5719 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5720 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5721 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5722 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5723 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5724
5725 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5726 Subtarget.isAIXABI())
5727 setUsesTOCBasePtr(DAG);
5728
5729 unsigned CallOpc =
5730 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5731 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5732
5733 if (!CFlags.IsIndirect)
5734 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5735 else if (Subtarget.usesFunctionDescriptors()) {
5736 if (Subtarget.usePointerGlueHelper()) {
5737 prepareOutOfLineGlueCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl,
5738 CFlags.HasNest, Subtarget);
5739 SDValue PtrGlueCallee =
5740 DAG.getExternalSymbol("_ptrgl", getPointerTy(DAG.getDataLayout()));
5741 Callee = transformCallee(PtrGlueCallee, DAG, dl, Subtarget);
5742 } else {
5743 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5744 dl, CFlags.HasNest, Subtarget);
5745 }
5746 } else {
5747 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5748 }
5749
5750 // Build the operand list for the call instruction.
5752 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5753 SPDiff, Subtarget);
5754
5755 // Emit tail call.
5756 if (CFlags.IsTailCall) {
5757 // Indirect tail call when using PC Relative calls do not have the same
5758 // constraints.
5759 assert(((Callee.getOpcode() == ISD::Register &&
5760 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5761 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5762 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5763 isa<ConstantSDNode>(Callee) ||
5764 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5765 "Expecting a global address, external symbol, absolute value, "
5766 "register or an indirect tail call when PC Relative calls are "
5767 "used.");
5768 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5769 assert(CallOpc == PPCISD::TC_RETURN &&
5770 "Unexpected call opcode for a tail call.");
5772 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5773 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5774 return Ret;
5775 }
5776
5777 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5778 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5779 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5780 Glue = Chain.getValue(1);
5781
5782 // When performing tail call optimization the callee pops its arguments off
5783 // the stack. Account for this here so these bytes can be pushed back on in
5784 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5785 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5787 ? NumBytes
5788 : 0;
5789
5790 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5791 Glue = Chain.getValue(1);
5792
5793 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5794 DAG, InVals);
5795}
5796
5798 CallingConv::ID CalleeCC = CB->getCallingConv();
5799 const Function *CallerFunc = CB->getCaller();
5800 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5801 const Function *CalleeFunc = CB->getCalledFunction();
5802 if (!CalleeFunc)
5803 return false;
5804 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5805
5808
5809 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5810 CalleeFunc->getAttributes(), Outs, *this,
5811 CalleeFunc->getDataLayout());
5812
5813 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5814 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5815 false /*isCalleeExternalSymbol*/);
5816}
5817
5818bool PPCTargetLowering::isEligibleForTCO(
5819 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5820 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5822 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5823 bool isCalleeExternalSymbol) const {
5824 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5825 return false;
5826
5827 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5828 return IsEligibleForTailCallOptimization_64SVR4(
5829 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5830 isCalleeExternalSymbol);
5831 else
5832 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5833 isVarArg, Ins);
5834}
5835
5836SDValue
5837PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5838 SmallVectorImpl<SDValue> &InVals) const {
5839 SelectionDAG &DAG = CLI.DAG;
5840 SDLoc &dl = CLI.DL;
5842 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5844 SDValue Chain = CLI.Chain;
5845 SDValue Callee = CLI.Callee;
5846 bool &isTailCall = CLI.IsTailCall;
5847 CallingConv::ID CallConv = CLI.CallConv;
5848 bool isVarArg = CLI.IsVarArg;
5849 bool isPatchPoint = CLI.IsPatchPoint;
5850 const CallBase *CB = CLI.CB;
5851
5852 if (isTailCall) {
5854 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5855 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5856 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5857 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5858
5859 isTailCall =
5860 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5861 &(MF.getFunction()), IsCalleeExternalSymbol);
5862 if (isTailCall) {
5863 ++NumTailCalls;
5864 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5865 ++NumSiblingCalls;
5866
5867 // PC Relative calls no longer guarantee that the callee is a Global
5868 // Address Node. The callee could be an indirect tail call in which
5869 // case the SDValue for the callee could be a load (to load the address
5870 // of a function pointer) or it may be a register copy (to move the
5871 // address of the callee from a function parameter into a virtual
5872 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5873 assert((Subtarget.isUsingPCRelativeCalls() ||
5874 isa<GlobalAddressSDNode>(Callee)) &&
5875 "Callee should be an llvm::Function object.");
5876
5877 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5878 << "\nTCO callee: ");
5879 LLVM_DEBUG(Callee.dump());
5880 }
5881 }
5882
5883 if (!isTailCall && CB && CB->isMustTailCall())
5884 report_fatal_error("failed to perform tail call elimination on a call "
5885 "site marked musttail");
5886
5887 // When long calls (i.e. indirect calls) are always used, calls are always
5888 // made via function pointer. If we have a function name, first translate it
5889 // into a pointer.
5890 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5891 !isTailCall)
5892 Callee = LowerGlobalAddress(Callee, DAG);
5893
5894 CallFlags CFlags(
5895 CallConv, isTailCall, isVarArg, isPatchPoint,
5896 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5897 // hasNest
5898 Subtarget.is64BitELFABI() &&
5899 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5900 CLI.NoMerge);
5901
5902 if (Subtarget.isAIXABI())
5903 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5904 InVals, CB);
5905
5906 assert(Subtarget.isSVR4ABI());
5907 if (Subtarget.isPPC64())
5908 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5909 InVals, CB);
5910 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5911 InVals, CB);
5912}
5913
5914SDValue PPCTargetLowering::LowerCall_32SVR4(
5915 SDValue Chain, SDValue Callee, CallFlags CFlags,
5917 const SmallVectorImpl<SDValue> &OutVals,
5918 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5920 const CallBase *CB) const {
5921 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5922 // of the 32-bit SVR4 ABI stack frame layout.
5923
5924 const CallingConv::ID CallConv = CFlags.CallConv;
5925 const bool IsVarArg = CFlags.IsVarArg;
5926 const bool IsTailCall = CFlags.IsTailCall;
5927
5928 assert((CallConv == CallingConv::C ||
5929 CallConv == CallingConv::Cold ||
5930 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5931
5932 const Align PtrAlign(4);
5933
5934 MachineFunction &MF = DAG.getMachineFunction();
5935
5936 // Mark this function as potentially containing a function that contains a
5937 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5938 // and restoring the callers stack pointer in this functions epilog. This is
5939 // done because by tail calling the called function might overwrite the value
5940 // in this function's (MF) stack pointer stack slot 0(SP).
5941 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5942 CallConv == CallingConv::Fast)
5943 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5944
5945 // Count how many bytes are to be pushed on the stack, including the linkage
5946 // area, parameter list area and the part of the local variable space which
5947 // contains copies of aggregates which are passed by value.
5948
5949 // Assign locations to all of the outgoing arguments.
5951 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5952
5953 // Reserve space for the linkage area on the stack.
5954 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5955 PtrAlign);
5956
5957 if (IsVarArg) {
5958 // Handle fixed and variable vector arguments differently.
5959 // Fixed vector arguments go into registers as long as registers are
5960 // available. Variable vector arguments always go into memory.
5961 unsigned NumArgs = Outs.size();
5962
5963 for (unsigned i = 0; i != NumArgs; ++i) {
5964 MVT ArgVT = Outs[i].VT;
5965 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5966 bool Result;
5967
5968 if (!ArgFlags.isVarArg()) {
5969 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5970 Outs[i].OrigTy, CCInfo);
5971 } else {
5973 ArgFlags, Outs[i].OrigTy, CCInfo);
5974 }
5975
5976 if (Result) {
5977#ifndef NDEBUG
5978 errs() << "Call operand #" << i << " has unhandled type "
5979 << ArgVT << "\n";
5980#endif
5981 llvm_unreachable(nullptr);
5982 }
5983 }
5984 } else {
5985 // All arguments are treated the same.
5986 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5987 }
5988
5989 // Assign locations to all of the outgoing aggregate by value arguments.
5990 SmallVector<CCValAssign, 16> ByValArgLocs;
5991 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5992
5993 // Reserve stack space for the allocations in CCInfo.
5994 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5995
5996 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5997
5998 // Size of the linkage area, parameter list area and the part of the local
5999 // space variable where copies of aggregates which are passed by value are
6000 // stored.
6001 unsigned NumBytes = CCByValInfo.getStackSize();
6002
6003 // Calculate by how many bytes the stack has to be adjusted in case of tail
6004 // call optimization.
6005 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6006
6007 // Adjust the stack pointer for the new arguments...
6008 // These operations are automatically eliminated by the prolog/epilog pass
6009 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6010 SDValue CallSeqStart = Chain;
6011
6012 // Load the return address and frame pointer so it can be moved somewhere else
6013 // later.
6014 SDValue LROp, FPOp;
6015 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6016
6017 // Set up a copy of the stack pointer for use loading and storing any
6018 // arguments that may not fit in the registers available for argument
6019 // passing.
6020 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6021
6023 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6024 SmallVector<SDValue, 8> MemOpChains;
6025
6026 bool seenFloatArg = false;
6027 // Walk the register/memloc assignments, inserting copies/loads.
6028 // i - Tracks the index into the list of registers allocated for the call
6029 // RealArgIdx - Tracks the index into the list of actual function arguments
6030 // j - Tracks the index into the list of byval arguments
6031 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6032 i != e;
6033 ++i, ++RealArgIdx) {
6034 CCValAssign &VA = ArgLocs[i];
6035 SDValue Arg = OutVals[RealArgIdx];
6036 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6037
6038 if (Flags.isByVal()) {
6039 // Argument is an aggregate which is passed by value, thus we need to
6040 // create a copy of it in the local variable space of the current stack
6041 // frame (which is the stack frame of the caller) and pass the address of
6042 // this copy to the callee.
6043 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6044 CCValAssign &ByValVA = ByValArgLocs[j++];
6045 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6046
6047 // Memory reserved in the local variable space of the callers stack frame.
6048 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6049
6050 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6051 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6052 StackPtr, PtrOff);
6053
6054 // Create a copy of the argument in the local area of the current
6055 // stack frame.
6056 SDValue MemcpyCall =
6057 CreateCopyOfByValArgument(Arg, PtrOff,
6058 CallSeqStart.getNode()->getOperand(0),
6059 Flags, DAG, dl);
6060
6061 // This must go outside the CALLSEQ_START..END.
6062 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6063 SDLoc(MemcpyCall));
6064 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6065 NewCallSeqStart.getNode());
6066 Chain = CallSeqStart = NewCallSeqStart;
6067
6068 // Pass the address of the aggregate copy on the stack either in a
6069 // physical register or in the parameter list area of the current stack
6070 // frame to the callee.
6071 Arg = PtrOff;
6072 }
6073
6074 // When useCRBits() is true, there can be i1 arguments.
6075 // It is because getRegisterType(MVT::i1) => MVT::i1,
6076 // and for other integer types getRegisterType() => MVT::i32.
6077 // Extend i1 and ensure callee will get i32.
6078 if (Arg.getValueType() == MVT::i1)
6079 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6080 dl, MVT::i32, Arg);
6081
6082 if (VA.isRegLoc()) {
6083 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6084 // Put argument in a physical register.
6085 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6086 bool IsLE = Subtarget.isLittleEndian();
6087 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6088 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6089 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6090 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6091 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6092 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6093 SVal.getValue(0)));
6094 } else
6095 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6096 } else {
6097 // Put argument in the parameter list area of the current stack frame.
6098 assert(VA.isMemLoc());
6099 unsigned LocMemOffset = VA.getLocMemOffset();
6100
6101 if (!IsTailCall) {
6102 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6103 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6104 StackPtr, PtrOff);
6105
6106 MemOpChains.push_back(
6107 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6108 } else {
6109 // Calculate and remember argument location.
6110 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6111 TailCallArguments);
6112 }
6113 }
6114 }
6115
6116 if (!MemOpChains.empty())
6117 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6118
6119 // Build a sequence of copy-to-reg nodes chained together with token chain
6120 // and flag operands which copy the outgoing args into the appropriate regs.
6121 SDValue InGlue;
6122 for (const auto &[Reg, N] : RegsToPass) {
6123 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6124 InGlue = Chain.getValue(1);
6125 }
6126
6127 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6128 // registers.
6129 if (IsVarArg) {
6130 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6131 SDValue Ops[] = { Chain, InGlue };
6132
6133 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6134 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6135
6136 InGlue = Chain.getValue(1);
6137 }
6138
6139 if (IsTailCall)
6140 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6141 TailCallArguments);
6142
6143 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6144 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6145}
6146
6147// Copy an argument into memory, being careful to do this outside the
6148// call sequence for the call to which the argument belongs.
6149SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6150 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6151 SelectionDAG &DAG, const SDLoc &dl) const {
6152 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6153 CallSeqStart.getNode()->getOperand(0),
6154 Flags, DAG, dl);
6155 // The MEMCPY must go outside the CALLSEQ_START..END.
6156 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6157 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6158 SDLoc(MemcpyCall));
6159 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6160 NewCallSeqStart.getNode());
6161 return NewCallSeqStart;
6162}
6163
6164SDValue PPCTargetLowering::LowerCall_64SVR4(
6165 SDValue Chain, SDValue Callee, CallFlags CFlags,
6167 const SmallVectorImpl<SDValue> &OutVals,
6168 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6170 const CallBase *CB) const {
6171 bool isELFv2ABI = Subtarget.isELFv2ABI();
6172 bool isLittleEndian = Subtarget.isLittleEndian();
6173 unsigned NumOps = Outs.size();
6174 bool IsSibCall = false;
6175 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6176
6177 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6178 unsigned PtrByteSize = 8;
6179
6180 MachineFunction &MF = DAG.getMachineFunction();
6181
6182 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6183 IsSibCall = true;
6184
6185 // Mark this function as potentially containing a function that contains a
6186 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6187 // and restoring the callers stack pointer in this functions epilog. This is
6188 // done because by tail calling the called function might overwrite the value
6189 // in this function's (MF) stack pointer stack slot 0(SP).
6190 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6191 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6192
6193 assert(!(IsFastCall && CFlags.IsVarArg) &&
6194 "fastcc not supported on varargs functions");
6195
6196 // Count how many bytes are to be pushed on the stack, including the linkage
6197 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6198 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6199 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6200 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6201 unsigned NumBytes = LinkageSize;
6202 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6203
6204 static const MCPhysReg GPR[] = {
6205 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6206 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6207 };
6208 static const MCPhysReg VR[] = {
6209 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6210 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6211 };
6212
6213 const unsigned NumGPRs = std::size(GPR);
6214 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6215 const unsigned NumVRs = std::size(VR);
6216
6217 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6218 // can be passed to the callee in registers.
6219 // For the fast calling convention, there is another check below.
6220 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6221 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6222 if (!HasParameterArea) {
6223 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6224 unsigned AvailableFPRs = NumFPRs;
6225 unsigned AvailableVRs = NumVRs;
6226 unsigned NumBytesTmp = NumBytes;
6227 for (unsigned i = 0; i != NumOps; ++i) {
6228 if (Outs[i].Flags.isNest()) continue;
6229 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6230 PtrByteSize, LinkageSize, ParamAreaSize,
6231 NumBytesTmp, AvailableFPRs, AvailableVRs))
6232 HasParameterArea = true;
6233 }
6234 }
6235
6236 // When using the fast calling convention, we don't provide backing for
6237 // arguments that will be in registers.
6238 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6239
6240 // Avoid allocating parameter area for fastcc functions if all the arguments
6241 // can be passed in the registers.
6242 if (IsFastCall)
6243 HasParameterArea = false;
6244
6245 // Add up all the space actually used.
6246 for (unsigned i = 0; i != NumOps; ++i) {
6247 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6248 EVT ArgVT = Outs[i].VT;
6249 EVT OrigVT = Outs[i].ArgVT;
6250
6251 if (Flags.isNest())
6252 continue;
6253
6254 if (IsFastCall) {
6255 if (Flags.isByVal()) {
6256 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6257 if (NumGPRsUsed > NumGPRs)
6258 HasParameterArea = true;
6259 } else {
6260 switch (ArgVT.getSimpleVT().SimpleTy) {
6261 default: llvm_unreachable("Unexpected ValueType for argument!");
6262 case MVT::i1:
6263 case MVT::i32:
6264 case MVT::i64:
6265 if (++NumGPRsUsed <= NumGPRs)
6266 continue;
6267 break;
6268 case MVT::v4i32:
6269 case MVT::v8i16:
6270 case MVT::v16i8:
6271 case MVT::v2f64:
6272 case MVT::v2i64:
6273 case MVT::v1i128:
6274 case MVT::f128:
6275 if (++NumVRsUsed <= NumVRs)
6276 continue;
6277 break;
6278 case MVT::v4f32:
6279 if (++NumVRsUsed <= NumVRs)
6280 continue;
6281 break;
6282 case MVT::f32:
6283 case MVT::f64:
6284 if (++NumFPRsUsed <= NumFPRs)
6285 continue;
6286 break;
6287 }
6288 HasParameterArea = true;
6289 }
6290 }
6291
6292 /* Respect alignment of argument on the stack. */
6293 auto Alignement =
6294 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6295 NumBytes = alignTo(NumBytes, Alignement);
6296
6297 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6298 if (Flags.isInConsecutiveRegsLast())
6299 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6300 }
6301
6302 unsigned NumBytesActuallyUsed = NumBytes;
6303
6304 // In the old ELFv1 ABI,
6305 // the prolog code of the callee may store up to 8 GPR argument registers to
6306 // the stack, allowing va_start to index over them in memory if its varargs.
6307 // Because we cannot tell if this is needed on the caller side, we have to
6308 // conservatively assume that it is needed. As such, make sure we have at
6309 // least enough stack space for the caller to store the 8 GPRs.
6310 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6311 // really requires memory operands, e.g. a vararg function.
6312 if (HasParameterArea)
6313 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6314 else
6315 NumBytes = LinkageSize;
6316
6317 // Tail call needs the stack to be aligned.
6318 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6319 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6320
6321 int SPDiff = 0;
6322
6323 // Calculate by how many bytes the stack has to be adjusted in case of tail
6324 // call optimization.
6325 if (!IsSibCall)
6326 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6327
6328 // To protect arguments on the stack from being clobbered in a tail call,
6329 // force all the loads to happen before doing any other lowering.
6330 if (CFlags.IsTailCall)
6331 Chain = DAG.getStackArgumentTokenFactor(Chain);
6332
6333 // Adjust the stack pointer for the new arguments...
6334 // These operations are automatically eliminated by the prolog/epilog pass
6335 if (!IsSibCall)
6336 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6337 SDValue CallSeqStart = Chain;
6338
6339 // Load the return address and frame pointer so it can be move somewhere else
6340 // later.
6341 SDValue LROp, FPOp;
6342 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6343
6344 // Set up a copy of the stack pointer for use loading and storing any
6345 // arguments that may not fit in the registers available for argument
6346 // passing.
6347 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6348
6349 // Figure out which arguments are going to go in registers, and which in
6350 // memory. Also, if this is a vararg function, floating point operations
6351 // must be stored to our stack, and loaded into integer regs as well, if
6352 // any integer regs are available for argument passing.
6353 unsigned ArgOffset = LinkageSize;
6354
6356 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6357
6358 SmallVector<SDValue, 8> MemOpChains;
6359 for (unsigned i = 0; i != NumOps; ++i) {
6360 SDValue Arg = OutVals[i];
6361 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6362 EVT ArgVT = Outs[i].VT;
6363 EVT OrigVT = Outs[i].ArgVT;
6364
6365 // PtrOff will be used to store the current argument to the stack if a
6366 // register cannot be found for it.
6367 SDValue PtrOff;
6368
6369 // We re-align the argument offset for each argument, except when using the
6370 // fast calling convention, when we need to make sure we do that only when
6371 // we'll actually use a stack slot.
6372 auto ComputePtrOff = [&]() {
6373 /* Respect alignment of argument on the stack. */
6374 auto Alignment =
6375 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6376 ArgOffset = alignTo(ArgOffset, Alignment);
6377
6378 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6379
6380 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6381 };
6382
6383 if (!IsFastCall) {
6384 ComputePtrOff();
6385
6386 /* Compute GPR index associated with argument offset. */
6387 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6388 GPR_idx = std::min(GPR_idx, NumGPRs);
6389 }
6390
6391 // Promote integers to 64-bit values.
6392 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6393 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6394 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6395 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6396 }
6397
6398 // FIXME memcpy is used way more than necessary. Correctness first.
6399 // Note: "by value" is code for passing a structure by value, not
6400 // basic types.
6401 if (Flags.isByVal()) {
6402 // Note: Size includes alignment padding, so
6403 // struct x { short a; char b; }
6404 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6405 // These are the proper values we need for right-justifying the
6406 // aggregate in a parameter register.
6407 unsigned Size = Flags.getByValSize();
6408
6409 // An empty aggregate parameter takes up no storage and no
6410 // registers.
6411 if (Size == 0)
6412 continue;
6413
6414 if (IsFastCall)
6415 ComputePtrOff();
6416
6417 // All aggregates smaller than 8 bytes must be passed right-justified.
6418 if (Size==1 || Size==2 || Size==4) {
6419 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6420 if (GPR_idx != NumGPRs) {
6421 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6422 MachinePointerInfo(), VT);
6423 MemOpChains.push_back(Load.getValue(1));
6424 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6425
6426 ArgOffset += PtrByteSize;
6427 continue;
6428 }
6429 }
6430
6431 if (GPR_idx == NumGPRs && Size < 8) {
6432 SDValue AddPtr = PtrOff;
6433 if (!isLittleEndian) {
6434 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6435 PtrOff.getValueType());
6436 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6437 }
6438 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6439 CallSeqStart,
6440 Flags, DAG, dl);
6441 ArgOffset += PtrByteSize;
6442 continue;
6443 }
6444 // Copy the object to parameter save area if it can not be entirely passed
6445 // by registers.
6446 // FIXME: we only need to copy the parts which need to be passed in
6447 // parameter save area. For the parts passed by registers, we don't need
6448 // to copy them to the stack although we need to allocate space for them
6449 // in parameter save area.
6450 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6451 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6452 CallSeqStart,
6453 Flags, DAG, dl);
6454
6455 // When a register is available, pass a small aggregate right-justified.
6456 if (Size < 8 && GPR_idx != NumGPRs) {
6457 // The easiest way to get this right-justified in a register
6458 // is to copy the structure into the rightmost portion of a
6459 // local variable slot, then load the whole slot into the
6460 // register.
6461 // FIXME: The memcpy seems to produce pretty awful code for
6462 // small aggregates, particularly for packed ones.
6463 // FIXME: It would be preferable to use the slot in the
6464 // parameter save area instead of a new local variable.
6465 SDValue AddPtr = PtrOff;
6466 if (!isLittleEndian) {
6467 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6468 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6469 }
6470 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6471 CallSeqStart,
6472 Flags, DAG, dl);
6473
6474 // Load the slot into the register.
6475 SDValue Load =
6476 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6477 MemOpChains.push_back(Load.getValue(1));
6478 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6479
6480 // Done with this argument.
6481 ArgOffset += PtrByteSize;
6482 continue;
6483 }
6484
6485 // For aggregates larger than PtrByteSize, copy the pieces of the
6486 // object that fit into registers from the parameter save area.
6487 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6488 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6489 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6490 if (GPR_idx != NumGPRs) {
6491 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6492 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6493 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6494 MachinePointerInfo(), ObjType);
6495
6496 MemOpChains.push_back(Load.getValue(1));
6497 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6498 ArgOffset += PtrByteSize;
6499 } else {
6500 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6501 break;
6502 }
6503 }
6504 continue;
6505 }
6506
6507 switch (Arg.getSimpleValueType().SimpleTy) {
6508 default: llvm_unreachable("Unexpected ValueType for argument!");
6509 case MVT::i1:
6510 case MVT::i32:
6511 case MVT::i64:
6512 if (Flags.isNest()) {
6513 // The 'nest' parameter, if any, is passed in R11.
6514 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6515 break;
6516 }
6517
6518 // These can be scalar arguments or elements of an integer array type
6519 // passed directly. Clang may use those instead of "byval" aggregate
6520 // types to avoid forcing arguments to memory unnecessarily.
6521 if (GPR_idx != NumGPRs) {
6522 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6523 } else {
6524 if (IsFastCall)
6525 ComputePtrOff();
6526
6527 assert(HasParameterArea &&
6528 "Parameter area must exist to pass an argument in memory.");
6529 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6530 true, CFlags.IsTailCall, false, MemOpChains,
6531 TailCallArguments, dl);
6532 if (IsFastCall)
6533 ArgOffset += PtrByteSize;
6534 }
6535 if (!IsFastCall)
6536 ArgOffset += PtrByteSize;
6537 break;
6538 case MVT::f32:
6539 case MVT::f64: {
6540 // These can be scalar arguments or elements of a float array type
6541 // passed directly. The latter are used to implement ELFv2 homogenous
6542 // float aggregates.
6543
6544 // Named arguments go into FPRs first, and once they overflow, the
6545 // remaining arguments go into GPRs and then the parameter save area.
6546 // Unnamed arguments for vararg functions always go to GPRs and
6547 // then the parameter save area. For now, put all arguments to vararg
6548 // routines always in both locations (FPR *and* GPR or stack slot).
6549 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6550 bool NeededLoad = false;
6551
6552 // First load the argument into the next available FPR.
6553 if (FPR_idx != NumFPRs)
6554 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6555
6556 // Next, load the argument into GPR or stack slot if needed.
6557 if (!NeedGPROrStack)
6558 ;
6559 else if (GPR_idx != NumGPRs && !IsFastCall) {
6560 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6561 // once we support fp <-> gpr moves.
6562
6563 // In the non-vararg case, this can only ever happen in the
6564 // presence of f32 array types, since otherwise we never run
6565 // out of FPRs before running out of GPRs.
6566 SDValue ArgVal;
6567
6568 // Double values are always passed in a single GPR.
6569 if (Arg.getValueType() != MVT::f32) {
6570 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6571
6572 // Non-array float values are extended and passed in a GPR.
6573 } else if (!Flags.isInConsecutiveRegs()) {
6574 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6575 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6576
6577 // If we have an array of floats, we collect every odd element
6578 // together with its predecessor into one GPR.
6579 } else if (ArgOffset % PtrByteSize != 0) {
6580 SDValue Lo, Hi;
6581 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6582 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6583 if (!isLittleEndian)
6584 std::swap(Lo, Hi);
6585 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6586
6587 // The final element, if even, goes into the first half of a GPR.
6588 } else if (Flags.isInConsecutiveRegsLast()) {
6589 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6590 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6591 if (!isLittleEndian)
6592 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6593 DAG.getConstant(32, dl, MVT::i32));
6594
6595 // Non-final even elements are skipped; they will be handled
6596 // together the with subsequent argument on the next go-around.
6597 } else
6598 ArgVal = SDValue();
6599
6600 if (ArgVal.getNode())
6601 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6602 } else {
6603 if (IsFastCall)
6604 ComputePtrOff();
6605
6606 // Single-precision floating-point values are mapped to the
6607 // second (rightmost) word of the stack doubleword.
6608 if (Arg.getValueType() == MVT::f32 &&
6609 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6610 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6611 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6612 }
6613
6614 assert(HasParameterArea &&
6615 "Parameter area must exist to pass an argument in memory.");
6616 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6617 true, CFlags.IsTailCall, false, MemOpChains,
6618 TailCallArguments, dl);
6619
6620 NeededLoad = true;
6621 }
6622 // When passing an array of floats, the array occupies consecutive
6623 // space in the argument area; only round up to the next doubleword
6624 // at the end of the array. Otherwise, each float takes 8 bytes.
6625 if (!IsFastCall || NeededLoad) {
6626 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6627 Flags.isInConsecutiveRegs()) ? 4 : 8;
6628 if (Flags.isInConsecutiveRegsLast())
6629 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6630 }
6631 break;
6632 }
6633 case MVT::v4f32:
6634 case MVT::v4i32:
6635 case MVT::v8i16:
6636 case MVT::v16i8:
6637 case MVT::v2f64:
6638 case MVT::v2i64:
6639 case MVT::v1i128:
6640 case MVT::f128:
6641 // These can be scalar arguments or elements of a vector array type
6642 // passed directly. The latter are used to implement ELFv2 homogenous
6643 // vector aggregates.
6644
6645 // For a varargs call, named arguments go into VRs or on the stack as
6646 // usual; unnamed arguments always go to the stack or the corresponding
6647 // GPRs when within range. For now, we always put the value in both
6648 // locations (or even all three).
6649 if (CFlags.IsVarArg) {
6650 assert(HasParameterArea &&
6651 "Parameter area must exist if we have a varargs call.");
6652 // We could elide this store in the case where the object fits
6653 // entirely in R registers. Maybe later.
6654 SDValue Store =
6655 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6656 MemOpChains.push_back(Store);
6657 if (VR_idx != NumVRs) {
6658 SDValue Load =
6659 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6660 MemOpChains.push_back(Load.getValue(1));
6661 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6662 }
6663 ArgOffset += 16;
6664 for (unsigned i=0; i<16; i+=PtrByteSize) {
6665 if (GPR_idx == NumGPRs)
6666 break;
6667 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6668 DAG.getConstant(i, dl, PtrVT));
6669 SDValue Load =
6670 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6671 MemOpChains.push_back(Load.getValue(1));
6672 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6673 }
6674 break;
6675 }
6676
6677 // Non-varargs Altivec params go into VRs or on the stack.
6678 if (VR_idx != NumVRs) {
6679 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6680 } else {
6681 if (IsFastCall)
6682 ComputePtrOff();
6683
6684 assert(HasParameterArea &&
6685 "Parameter area must exist to pass an argument in memory.");
6686 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6687 true, CFlags.IsTailCall, true, MemOpChains,
6688 TailCallArguments, dl);
6689 if (IsFastCall)
6690 ArgOffset += 16;
6691 }
6692
6693 if (!IsFastCall)
6694 ArgOffset += 16;
6695 break;
6696 }
6697 }
6698
6699 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6700 "mismatch in size of parameter area");
6701 (void)NumBytesActuallyUsed;
6702
6703 if (!MemOpChains.empty())
6704 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6705
6706 // Check if this is an indirect call (MTCTR/BCTRL).
6707 // See prepareDescriptorIndirectCall and buildCallOperands for more
6708 // information about calls through function pointers in the 64-bit SVR4 ABI.
6709 if (CFlags.IsIndirect) {
6710 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6711 // caller in the TOC save area.
6712 if (isTOCSaveRestoreRequired(Subtarget)) {
6713 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6714 // Load r2 into a virtual register and store it to the TOC save area.
6715 setUsesTOCBasePtr(DAG);
6716 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6717 // TOC save area offset.
6718 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6719 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6720 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6721 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6723 DAG.getMachineFunction(), TOCSaveOffset));
6724 }
6725 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6726 // This does not mean the MTCTR instruction must use R12; it's easier
6727 // to model this as an extra parameter, so do that.
6728 if (isELFv2ABI && !CFlags.IsPatchPoint)
6729 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6730 }
6731
6732 // Build a sequence of copy-to-reg nodes chained together with token chain
6733 // and flag operands which copy the outgoing args into the appropriate regs.
6734 SDValue InGlue;
6735 for (const auto &[Reg, N] : RegsToPass) {
6736 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6737 InGlue = Chain.getValue(1);
6738 }
6739
6740 if (CFlags.IsTailCall && !IsSibCall)
6741 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6742 TailCallArguments);
6743
6744 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6745 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6746}
6747
6748// Returns true when the shadow of a general purpose argument register
6749// in the parameter save area is aligned to at least 'RequiredAlign'.
6750static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6751 assert(RequiredAlign.value() <= 16 &&
6752 "Required alignment greater than stack alignment.");
6753 switch (Reg) {
6754 default:
6755 report_fatal_error("called on invalid register.");
6756 case PPC::R5:
6757 case PPC::R9:
6758 case PPC::X3:
6759 case PPC::X5:
6760 case PPC::X7:
6761 case PPC::X9:
6762 // These registers are 16 byte aligned which is the most strict aligment
6763 // we can support.
6764 return true;
6765 case PPC::R3:
6766 case PPC::R7:
6767 case PPC::X4:
6768 case PPC::X6:
6769 case PPC::X8:
6770 case PPC::X10:
6771 // The shadow of these registers in the PSA is 8 byte aligned.
6772 return RequiredAlign <= 8;
6773 case PPC::R4:
6774 case PPC::R6:
6775 case PPC::R8:
6776 case PPC::R10:
6777 return RequiredAlign <= 4;
6778 }
6779}
6780
6781static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6782 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6783 Type *OrigTy, CCState &State) {
6784 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6785 State.getMachineFunction().getSubtarget());
6786 const bool IsPPC64 = Subtarget.isPPC64();
6787 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6788 const Align PtrAlign(PtrSize);
6789 const Align StackAlign(16);
6790 const MVT RegVT = Subtarget.getScalarIntVT();
6791
6792 if (ValVT == MVT::f128)
6793 report_fatal_error("f128 is unimplemented on AIX.");
6794
6795 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6796 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6797 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6798 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6799 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6800 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6801
6802 static const MCPhysReg VR[] = {// Vector registers.
6803 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6804 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6805 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6806
6807 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6808
6809 if (ArgFlags.isNest()) {
6810 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6811 if (!EnvReg)
6812 report_fatal_error("More then one nest argument.");
6813 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6814 return false;
6815 }
6816
6817 if (ArgFlags.isByVal()) {
6818 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6819 if (ByValAlign > StackAlign)
6820 report_fatal_error("Pass-by-value arguments with alignment greater than "
6821 "16 are not supported.");
6822
6823 const unsigned ByValSize = ArgFlags.getByValSize();
6824 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6825
6826 // An empty aggregate parameter takes up no storage and no registers,
6827 // but needs a MemLoc for a stack slot for the formal arguments side.
6828 if (ByValSize == 0) {
6830 State.getStackSize(), RegVT, LocInfo));
6831 return false;
6832 }
6833
6834 // Shadow allocate any registers that are not properly aligned.
6835 unsigned NextReg = State.getFirstUnallocated(GPRs);
6836 while (NextReg != GPRs.size() &&
6837 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6838 // Shadow allocate next registers since its aligment is not strict enough.
6839 MCRegister Reg = State.AllocateReg(GPRs);
6840 // Allocate the stack space shadowed by said register.
6841 State.AllocateStack(PtrSize, PtrAlign);
6842 assert(Reg && "Alocating register unexpectedly failed.");
6843 (void)Reg;
6844 NextReg = State.getFirstUnallocated(GPRs);
6845 }
6846
6847 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6848 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6849 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6850 if (MCRegister Reg = State.AllocateReg(GPRs))
6851 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6852 else {
6855 LocInfo));
6856 break;
6857 }
6858 }
6859 return false;
6860 }
6861
6862 // Arguments always reserve parameter save area.
6863 switch (ValVT.SimpleTy) {
6864 default:
6865 report_fatal_error("Unhandled value type for argument.");
6866 case MVT::i64:
6867 // i64 arguments should have been split to i32 for PPC32.
6868 assert(IsPPC64 && "PPC32 should have split i64 values.");
6869 [[fallthrough]];
6870 case MVT::i1:
6871 case MVT::i32: {
6872 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6873 // AIX integer arguments are always passed in register width.
6874 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6875 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6877 if (MCRegister Reg = State.AllocateReg(GPRs))
6878 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6879 else
6880 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6881
6882 return false;
6883 }
6884 case MVT::f32:
6885 case MVT::f64: {
6886 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6887 const unsigned StoreSize = LocVT.getStoreSize();
6888 // Floats are always 4-byte aligned in the PSA on AIX.
6889 // This includes f64 in 64-bit mode for ABI compatibility.
6890 const unsigned Offset =
6891 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6892 MCRegister FReg = State.AllocateReg(FPR);
6893 if (FReg)
6894 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6895
6896 // Reserve and initialize GPRs or initialize the PSA as required.
6897 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6898 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6899 assert(FReg && "An FPR should be available when a GPR is reserved.");
6900 if (State.isVarArg()) {
6901 // Successfully reserved GPRs are only initialized for vararg calls.
6902 // Custom handling is required for:
6903 // f64 in PPC32 needs to be split into 2 GPRs.
6904 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6905 State.addLoc(
6906 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6907 }
6908 } else {
6909 // If there are insufficient GPRs, the PSA needs to be initialized.
6910 // Initialization occurs even if an FPR was initialized for
6911 // compatibility with the AIX XL compiler. The full memory for the
6912 // argument will be initialized even if a prior word is saved in GPR.
6913 // A custom memLoc is used when the argument also passes in FPR so
6914 // that the callee handling can skip over it easily.
6915 State.addLoc(
6916 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6917 LocInfo)
6918 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6919 break;
6920 }
6921 }
6922
6923 return false;
6924 }
6925 case MVT::v4f32:
6926 case MVT::v4i32:
6927 case MVT::v8i16:
6928 case MVT::v16i8:
6929 case MVT::v2i64:
6930 case MVT::v2f64:
6931 case MVT::v1i128: {
6932 const unsigned VecSize = 16;
6933 const Align VecAlign(VecSize);
6934
6935 if (!State.isVarArg()) {
6936 // If there are vector registers remaining we don't consume any stack
6937 // space.
6938 if (MCRegister VReg = State.AllocateReg(VR)) {
6939 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6940 return false;
6941 }
6942 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6943 // might be allocated in the portion of the PSA that is shadowed by the
6944 // GPRs.
6945 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6946 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6947 return false;
6948 }
6949
6950 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6951 // Burn any underaligned registers and their shadowed stack space until
6952 // we reach the required alignment.
6953 while (NextRegIndex != GPRs.size() &&
6954 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6955 // Shadow allocate register and its stack shadow.
6956 MCRegister Reg = State.AllocateReg(GPRs);
6957 State.AllocateStack(PtrSize, PtrAlign);
6958 assert(Reg && "Allocating register unexpectedly failed.");
6959 (void)Reg;
6960 NextRegIndex = State.getFirstUnallocated(GPRs);
6961 }
6962
6963 // Vectors that are passed as fixed arguments are handled differently.
6964 // They are passed in VRs if any are available (unlike arguments passed
6965 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6966 // functions)
6967 if (!ArgFlags.isVarArg()) {
6968 if (MCRegister VReg = State.AllocateReg(VR)) {
6969 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6970 // Shadow allocate GPRs and stack space even though we pass in a VR.
6971 for (unsigned I = 0; I != VecSize; I += PtrSize)
6972 State.AllocateReg(GPRs);
6973 State.AllocateStack(VecSize, VecAlign);
6974 return false;
6975 }
6976 // No vector registers remain so pass on the stack.
6977 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6978 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6979 return false;
6980 }
6981
6982 // If all GPRS are consumed then we pass the argument fully on the stack.
6983 if (NextRegIndex == GPRs.size()) {
6984 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6985 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6986 return false;
6987 }
6988
6989 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6990 // half of the argument, and then need to pass the remaining half on the
6991 // stack.
6992 if (GPRs[NextRegIndex] == PPC::R9) {
6993 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6994 State.addLoc(
6995 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6996
6997 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6998 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6999 assert(FirstReg && SecondReg &&
7000 "Allocating R9 or R10 unexpectedly failed.");
7001 State.addLoc(
7002 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7003 State.addLoc(
7004 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7005 return false;
7006 }
7007
7008 // We have enough GPRs to fully pass the vector argument, and we have
7009 // already consumed any underaligned registers. Start with the custom
7010 // MemLoc and then the custom RegLocs.
7011 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7012 State.addLoc(
7013 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7014 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7015 const MCRegister Reg = State.AllocateReg(GPRs);
7016 assert(Reg && "Failed to allocated register for vararg vector argument");
7017 State.addLoc(
7018 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7019 }
7020 return false;
7021 }
7022 }
7023 return true;
7024}
7025
7026// So far, this function is only used by LowerFormalArguments_AIX()
7028 bool IsPPC64,
7029 bool HasP8Vector,
7030 bool HasVSX) {
7031 assert((IsPPC64 || SVT != MVT::i64) &&
7032 "i64 should have been split for 32-bit codegen.");
7033
7034 switch (SVT) {
7035 default:
7036 report_fatal_error("Unexpected value type for formal argument");
7037 case MVT::i1:
7038 case MVT::i32:
7039 case MVT::i64:
7040 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7041 case MVT::f32:
7042 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7043 case MVT::f64:
7044 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7045 case MVT::v4f32:
7046 case MVT::v4i32:
7047 case MVT::v8i16:
7048 case MVT::v16i8:
7049 case MVT::v2i64:
7050 case MVT::v2f64:
7051 case MVT::v1i128:
7052 return &PPC::VRRCRegClass;
7053 }
7054}
7055
7057 SelectionDAG &DAG, SDValue ArgValue,
7058 MVT LocVT, const SDLoc &dl) {
7059 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7060 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7061
7062 if (Flags.isSExt())
7063 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7064 DAG.getValueType(ValVT));
7065 else if (Flags.isZExt())
7066 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7067 DAG.getValueType(ValVT));
7068
7069 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7070}
7071
7072static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7073 const unsigned LASize = FL->getLinkageSize();
7074
7075 if (PPC::GPRCRegClass.contains(Reg)) {
7076 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7077 "Reg must be a valid argument register!");
7078 return LASize + 4 * (Reg - PPC::R3);
7079 }
7080
7081 if (PPC::G8RCRegClass.contains(Reg)) {
7082 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7083 "Reg must be a valid argument register!");
7084 return LASize + 8 * (Reg - PPC::X3);
7085 }
7086
7087 llvm_unreachable("Only general purpose registers expected.");
7088}
7089
7090// AIX ABI Stack Frame Layout:
7091//
7092// Low Memory +--------------------------------------------+
7093// SP +---> | Back chain | ---+
7094// | +--------------------------------------------+ |
7095// | | Saved Condition Register | |
7096// | +--------------------------------------------+ |
7097// | | Saved Linkage Register | |
7098// | +--------------------------------------------+ | Linkage Area
7099// | | Reserved for compilers | |
7100// | +--------------------------------------------+ |
7101// | | Reserved for binders | |
7102// | +--------------------------------------------+ |
7103// | | Saved TOC pointer | ---+
7104// | +--------------------------------------------+
7105// | | Parameter save area |
7106// | +--------------------------------------------+
7107// | | Alloca space |
7108// | +--------------------------------------------+
7109// | | Local variable space |
7110// | +--------------------------------------------+
7111// | | Float/int conversion temporary |
7112// | +--------------------------------------------+
7113// | | Save area for AltiVec registers |
7114// | +--------------------------------------------+
7115// | | AltiVec alignment padding |
7116// | +--------------------------------------------+
7117// | | Save area for VRSAVE register |
7118// | +--------------------------------------------+
7119// | | Save area for General Purpose registers |
7120// | +--------------------------------------------+
7121// | | Save area for Floating Point registers |
7122// | +--------------------------------------------+
7123// +---- | Back chain |
7124// High Memory +--------------------------------------------+
7125//
7126// Specifications:
7127// AIX 7.2 Assembler Language Reference
7128// Subroutine linkage convention
7129
7130SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7131 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7132 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7133 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7134
7135 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7136 CallConv == CallingConv::Fast) &&
7137 "Unexpected calling convention!");
7138
7139 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7140 report_fatal_error("Tail call support is unimplemented on AIX.");
7141
7142 if (useSoftFloat())
7143 report_fatal_error("Soft float support is unimplemented on AIX.");
7144
7145 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7146
7147 const bool IsPPC64 = Subtarget.isPPC64();
7148 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7149
7150 // Assign locations to all of the incoming arguments.
7152 MachineFunction &MF = DAG.getMachineFunction();
7153 MachineFrameInfo &MFI = MF.getFrameInfo();
7154 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7155 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7156
7157 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7158 // Reserve space for the linkage area on the stack.
7159 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7160 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7161 uint64_t SaveStackPos = CCInfo.getStackSize();
7162 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7163 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7164
7166
7167 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7168 CCValAssign &VA = ArgLocs[I++];
7169 MVT LocVT = VA.getLocVT();
7170 MVT ValVT = VA.getValVT();
7171 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7172
7173 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7174 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7175 // For compatibility with the AIX XL compiler, the float args in the
7176 // parameter save area are initialized even if the argument is available
7177 // in register. The caller is required to initialize both the register
7178 // and memory, however, the callee can choose to expect it in either.
7179 // The memloc is dismissed here because the argument is retrieved from
7180 // the register.
7181 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7182 continue;
7183
7184 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7185 const TargetRegisterClass *RegClass = getRegClassForSVT(
7186 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7187 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7188 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7189 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7190 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7191 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7192 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7193 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7194 MachinePointerInfo(), Align(PtrByteSize));
7195 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7196 MemOps.push_back(StoreReg);
7197 }
7198
7199 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7200 unsigned StoreSize =
7201 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7202 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7203 }
7204
7205 auto HandleMemLoc = [&]() {
7206 const unsigned LocSize = LocVT.getStoreSize();
7207 const unsigned ValSize = ValVT.getStoreSize();
7208 assert((ValSize <= LocSize) &&
7209 "Object size is larger than size of MemLoc");
7210 int CurArgOffset = VA.getLocMemOffset();
7211 // Objects are right-justified because AIX is big-endian.
7212 if (LocSize > ValSize)
7213 CurArgOffset += LocSize - ValSize;
7214 // Potential tail calls could cause overwriting of argument stack slots.
7215 const bool IsImmutable =
7217 (CallConv == CallingConv::Fast));
7218 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7219 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7220 SDValue ArgValue =
7221 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7222
7223 // While the ABI specifies the argument type is (sign or zero) extended
7224 // out to register width, not all code is compliant. We truncate and
7225 // re-extend to be more forgiving of these callers when the argument type
7226 // is smaller than register width.
7227 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7228 ValVT.isInteger() &&
7229 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7230 // It is possible to have either real integer values
7231 // or integers that were not originally integers.
7232 // In the latter case, these could have came from structs,
7233 // and these integers would not have an extend on the parameter.
7234 // Since these types of integers do not have an extend specified
7235 // in the first place, the type of extend that we do should not matter.
7236 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7237 ? MVT::i8
7238 : ArgVT;
7239 SDValue ArgValueTrunc =
7240 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7241 SDValue ArgValueExt =
7242 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7243 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7244 InVals.push_back(ArgValueExt);
7245 } else {
7246 InVals.push_back(ArgValue);
7247 }
7248 };
7249
7250 // Vector arguments to VaArg functions are passed both on the stack, and
7251 // in any available GPRs. Load the value from the stack and add the GPRs
7252 // as live ins.
7253 if (VA.isMemLoc() && VA.needsCustom()) {
7254 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7255 assert(isVarArg && "Only use custom memloc for vararg.");
7256 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7257 // matching custom RegLocs.
7258 const unsigned OriginalValNo = VA.getValNo();
7259 (void)OriginalValNo;
7260
7261 auto HandleCustomVecRegLoc = [&]() {
7262 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7263 "Missing custom RegLoc.");
7264 VA = ArgLocs[I++];
7265 assert(VA.getValVT().isVector() &&
7266 "Unexpected Val type for custom RegLoc.");
7267 assert(VA.getValNo() == OriginalValNo &&
7268 "ValNo mismatch between custom MemLoc and RegLoc.");
7270 MF.addLiveIn(VA.getLocReg(),
7271 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7272 Subtarget.hasVSX()));
7273 };
7274
7275 HandleMemLoc();
7276 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7277 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7278 // R10.
7279 HandleCustomVecRegLoc();
7280 HandleCustomVecRegLoc();
7281
7282 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7283 // we passed the vector in R5, R6, R7 and R8.
7284 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7285 assert(!IsPPC64 &&
7286 "Only 2 custom RegLocs expected for 64-bit codegen.");
7287 HandleCustomVecRegLoc();
7288 HandleCustomVecRegLoc();
7289 }
7290
7291 continue;
7292 }
7293
7294 if (VA.isRegLoc()) {
7295 if (VA.getValVT().isScalarInteger())
7297 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7298 switch (VA.getValVT().SimpleTy) {
7299 default:
7300 report_fatal_error("Unhandled value type for argument.");
7301 case MVT::f32:
7303 break;
7304 case MVT::f64:
7306 break;
7307 }
7308 } else if (VA.getValVT().isVector()) {
7309 switch (VA.getValVT().SimpleTy) {
7310 default:
7311 report_fatal_error("Unhandled value type for argument.");
7312 case MVT::v16i8:
7314 break;
7315 case MVT::v8i16:
7317 break;
7318 case MVT::v4i32:
7319 case MVT::v2i64:
7320 case MVT::v1i128:
7322 break;
7323 case MVT::v4f32:
7324 case MVT::v2f64:
7326 break;
7327 }
7328 }
7329 }
7330
7331 if (Flags.isByVal() && VA.isMemLoc()) {
7332 const unsigned Size =
7333 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7334 PtrByteSize);
7335 const int FI = MF.getFrameInfo().CreateFixedObject(
7336 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7337 /* IsAliased */ true);
7338 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7339 InVals.push_back(FIN);
7340
7341 continue;
7342 }
7343
7344 if (Flags.isByVal()) {
7345 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7346
7347 const MCPhysReg ArgReg = VA.getLocReg();
7348 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7349
7350 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7351 const int FI = MF.getFrameInfo().CreateFixedObject(
7352 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7353 /* IsAliased */ true);
7354 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7355 InVals.push_back(FIN);
7356
7357 // Add live ins for all the RegLocs for the same ByVal.
7358 const TargetRegisterClass *RegClass =
7359 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7360
7361 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7362 unsigned Offset) {
7363 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7364 // Since the callers side has left justified the aggregate in the
7365 // register, we can simply store the entire register into the stack
7366 // slot.
7367 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7368 // The store to the fixedstack object is needed becuase accessing a
7369 // field of the ByVal will use a gep and load. Ideally we will optimize
7370 // to extracting the value from the register directly, and elide the
7371 // stores when the arguments address is not taken, but that will need to
7372 // be future work.
7373 SDValue Store = DAG.getStore(
7374 CopyFrom.getValue(1), dl, CopyFrom,
7377
7378 MemOps.push_back(Store);
7379 };
7380
7381 unsigned Offset = 0;
7382 HandleRegLoc(VA.getLocReg(), Offset);
7383 Offset += PtrByteSize;
7384 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7385 Offset += PtrByteSize) {
7386 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7387 "RegLocs should be for ByVal argument.");
7388
7389 const CCValAssign RL = ArgLocs[I++];
7390 HandleRegLoc(RL.getLocReg(), Offset);
7392 }
7393
7394 if (Offset != StackSize) {
7395 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7396 "Expected MemLoc for remaining bytes.");
7397 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7398 // Consume the MemLoc.The InVal has already been emitted, so nothing
7399 // more needs to be done.
7400 ++I;
7401 }
7402
7403 continue;
7404 }
7405
7406 if (VA.isRegLoc() && !VA.needsCustom()) {
7407 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7408 Register VReg =
7409 MF.addLiveIn(VA.getLocReg(),
7410 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7411 Subtarget.hasVSX()));
7412 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7413 if (ValVT.isScalarInteger() &&
7414 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7415 ArgValue =
7416 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7417 }
7418 InVals.push_back(ArgValue);
7419 continue;
7420 }
7421 if (VA.isMemLoc()) {
7422 HandleMemLoc();
7423 continue;
7424 }
7425 }
7426
7427 // On AIX a minimum of 8 words is saved to the parameter save area.
7428 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7429 // Area that is at least reserved in the caller of this function.
7430 unsigned CallerReservedArea = std::max<unsigned>(
7431 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7432
7433 // Set the size that is at least reserved in caller of this function. Tail
7434 // call optimized function's reserved stack space needs to be aligned so
7435 // that taking the difference between two stack areas will result in an
7436 // aligned stack.
7437 CallerReservedArea =
7438 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7439 FuncInfo->setMinReservedArea(CallerReservedArea);
7440
7441 if (isVarArg) {
7442 int VAListIndex = 0;
7443 // If any of the optional arguments are passed in register then the fixed
7444 // stack object we spill into is not immutable. Create a fixed stack object
7445 // that overlaps the remainder of the parameter save area.
7446 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7447 unsigned FixedStackSize =
7448 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7449 VAListIndex =
7450 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7451 /* IsImmutable */ false, /* IsAliased */ true);
7452 } else {
7453 // All the arguments passed through ellipses are on the stack. Create a
7454 // dummy fixed stack object the same size as a pointer since we don't
7455 // know the actual size.
7456 VAListIndex =
7457 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7458 /* IsImmutable */ true, /* IsAliased */ true);
7459 }
7460
7461 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7462 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7463
7464 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7465 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7466
7467 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7468 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7469 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7470
7471 // The fixed integer arguments of a variadic function are stored to the
7472 // VarArgsFrameIndex on the stack so that they may be loaded by
7473 // dereferencing the result of va_next.
7474 for (unsigned
7475 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7476 Offset = 0;
7477 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7478
7479 const Register VReg =
7480 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7481 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7482
7483 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7484 MachinePointerInfo MPI =
7485 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7486 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7487 MemOps.push_back(Store);
7488 // Increment the address for the next argument to store.
7489 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7490 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7491 }
7492 }
7493
7494 if (!MemOps.empty())
7495 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7496
7497 return Chain;
7498}
7499
7500SDValue PPCTargetLowering::LowerCall_AIX(
7501 SDValue Chain, SDValue Callee, CallFlags CFlags,
7503 const SmallVectorImpl<SDValue> &OutVals,
7504 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7506 const CallBase *CB) const {
7507 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7508 // AIX ABI stack frame layout.
7509
7510 assert((CFlags.CallConv == CallingConv::C ||
7511 CFlags.CallConv == CallingConv::Cold ||
7512 CFlags.CallConv == CallingConv::Fast) &&
7513 "Unexpected calling convention!");
7514
7515 if (CFlags.IsPatchPoint)
7516 report_fatal_error("This call type is unimplemented on AIX.");
7517
7518 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7519
7520 MachineFunction &MF = DAG.getMachineFunction();
7522 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7523 *DAG.getContext());
7524
7525 // Reserve space for the linkage save area (LSA) on the stack.
7526 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7527 // [SP][CR][LR][2 x reserved][TOC].
7528 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7529 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7530 const bool IsPPC64 = Subtarget.isPPC64();
7531 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7532 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7533 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7534 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7535
7536 // The prolog code of the callee may store up to 8 GPR argument registers to
7537 // the stack, allowing va_start to index over them in memory if the callee
7538 // is variadic.
7539 // Because we cannot tell if this is needed on the caller side, we have to
7540 // conservatively assume that it is needed. As such, make sure we have at
7541 // least enough stack space for the caller to store the 8 GPRs.
7542 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7543 const unsigned NumBytes = std::max<unsigned>(
7544 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7545
7546 // Adjust the stack pointer for the new arguments...
7547 // These operations are automatically eliminated by the prolog/epilog pass.
7548 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7549 SDValue CallSeqStart = Chain;
7550
7552 SmallVector<SDValue, 8> MemOpChains;
7553
7554 // Set up a copy of the stack pointer for loading and storing any
7555 // arguments that may not fit in the registers available for argument
7556 // passing.
7557 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7558 : DAG.getRegister(PPC::R1, MVT::i32);
7559
7560 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7561 const unsigned ValNo = ArgLocs[I].getValNo();
7562 SDValue Arg = OutVals[ValNo];
7563 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7564
7565 if (Flags.isByVal()) {
7566 const unsigned ByValSize = Flags.getByValSize();
7567
7568 // Nothing to do for zero-sized ByVals on the caller side.
7569 if (!ByValSize) {
7570 ++I;
7571 continue;
7572 }
7573
7574 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7575 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7576 (LoadOffset != 0)
7577 ? DAG.getObjectPtrOffset(
7578 dl, Arg, TypeSize::getFixed(LoadOffset))
7579 : Arg,
7580 MachinePointerInfo(), VT);
7581 };
7582
7583 unsigned LoadOffset = 0;
7584
7585 // Initialize registers, which are fully occupied by the by-val argument.
7586 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7587 SDValue Load = GetLoad(PtrVT, LoadOffset);
7588 MemOpChains.push_back(Load.getValue(1));
7589 LoadOffset += PtrByteSize;
7590 const CCValAssign &ByValVA = ArgLocs[I++];
7591 assert(ByValVA.getValNo() == ValNo &&
7592 "Unexpected location for pass-by-value argument.");
7593 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7594 }
7595
7596 if (LoadOffset == ByValSize)
7597 continue;
7598
7599 // There must be one more loc to handle the remainder.
7600 assert(ArgLocs[I].getValNo() == ValNo &&
7601 "Expected additional location for by-value argument.");
7602
7603 if (ArgLocs[I].isMemLoc()) {
7604 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7605 const CCValAssign &ByValVA = ArgLocs[I++];
7606 ISD::ArgFlagsTy MemcpyFlags = Flags;
7607 // Only memcpy the bytes that don't pass in register.
7608 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7609 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7610 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7611 dl, Arg, TypeSize::getFixed(LoadOffset))
7612 : Arg,
7614 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7615 CallSeqStart, MemcpyFlags, DAG, dl);
7616 continue;
7617 }
7618
7619 // Initialize the final register residue.
7620 // Any residue that occupies the final by-val arg register must be
7621 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7622 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7623 // 2 and 1 byte loads.
7624 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7625 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7626 "Unexpected register residue for by-value argument.");
7627 SDValue ResidueVal;
7628 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7629 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7630 const MVT VT =
7631 N == 1 ? MVT::i8
7632 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7633 SDValue Load = GetLoad(VT, LoadOffset);
7634 MemOpChains.push_back(Load.getValue(1));
7635 LoadOffset += N;
7636 Bytes += N;
7637
7638 // By-val arguments are passed left-justfied in register.
7639 // Every load here needs to be shifted, otherwise a full register load
7640 // should have been used.
7641 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7642 "Unexpected load emitted during handling of pass-by-value "
7643 "argument.");
7644 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7645 EVT ShiftAmountTy =
7646 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7647 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7648 SDValue ShiftedLoad =
7649 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7650 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7651 ShiftedLoad)
7652 : ShiftedLoad;
7653 }
7654
7655 const CCValAssign &ByValVA = ArgLocs[I++];
7656 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7657 continue;
7658 }
7659
7660 CCValAssign &VA = ArgLocs[I++];
7661 const MVT LocVT = VA.getLocVT();
7662 const MVT ValVT = VA.getValVT();
7663
7664 switch (VA.getLocInfo()) {
7665 default:
7666 report_fatal_error("Unexpected argument extension type.");
7667 case CCValAssign::Full:
7668 break;
7669 case CCValAssign::ZExt:
7670 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7671 break;
7672 case CCValAssign::SExt:
7673 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7674 break;
7675 }
7676
7677 if (VA.isRegLoc() && !VA.needsCustom()) {
7678 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7679 continue;
7680 }
7681
7682 // Vector arguments passed to VarArg functions need custom handling when
7683 // they are passed (at least partially) in GPRs.
7684 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7685 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7686 // Store value to its stack slot.
7687 SDValue PtrOff =
7688 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7689 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7690 SDValue Store =
7691 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7692 MemOpChains.push_back(Store);
7693 const unsigned OriginalValNo = VA.getValNo();
7694 // Then load the GPRs from the stack
7695 unsigned LoadOffset = 0;
7696 auto HandleCustomVecRegLoc = [&]() {
7697 assert(I != E && "Unexpected end of CCvalAssigns.");
7698 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7699 "Expected custom RegLoc.");
7700 CCValAssign RegVA = ArgLocs[I++];
7701 assert(RegVA.getValNo() == OriginalValNo &&
7702 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7703 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7704 DAG.getConstant(LoadOffset, dl, PtrVT));
7705 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7706 MemOpChains.push_back(Load.getValue(1));
7707 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7708 LoadOffset += PtrByteSize;
7709 };
7710
7711 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7712 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7713 // R10.
7714 HandleCustomVecRegLoc();
7715 HandleCustomVecRegLoc();
7716
7717 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7718 ArgLocs[I].getValNo() == OriginalValNo) {
7719 assert(!IsPPC64 &&
7720 "Only 2 custom RegLocs expected for 64-bit codegen.");
7721 HandleCustomVecRegLoc();
7722 HandleCustomVecRegLoc();
7723 }
7724
7725 continue;
7726 }
7727
7728 if (VA.isMemLoc()) {
7729 SDValue PtrOff =
7730 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7731 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7732 MemOpChains.push_back(
7733 DAG.getStore(Chain, dl, Arg, PtrOff,
7735 Subtarget.getFrameLowering()->getStackAlign()));
7736
7737 continue;
7738 }
7739
7740 if (!ValVT.isFloatingPoint())
7742 "Unexpected register handling for calling convention.");
7743
7744 // Custom handling is used for GPR initializations for vararg float
7745 // arguments.
7746 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7747 LocVT.isInteger() &&
7748 "Custom register handling only expected for VarArg.");
7749
7750 SDValue ArgAsInt =
7751 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7752
7753 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7754 // f32 in 32-bit GPR
7755 // f64 in 64-bit GPR
7756 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7757 else if (Arg.getValueType().getFixedSizeInBits() <
7758 LocVT.getFixedSizeInBits())
7759 // f32 in 64-bit GPR.
7760 RegsToPass.push_back(std::make_pair(
7761 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7762 else {
7763 // f64 in two 32-bit GPRs
7764 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7765 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7766 "Unexpected custom register for argument!");
7767 CCValAssign &GPR1 = VA;
7768 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7769 DAG.getConstant(32, dl, MVT::i8));
7770 RegsToPass.push_back(std::make_pair(
7771 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7772
7773 if (I != E) {
7774 // If only 1 GPR was available, there will only be one custom GPR and
7775 // the argument will also pass in memory.
7776 CCValAssign &PeekArg = ArgLocs[I];
7777 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7778 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7779 CCValAssign &GPR2 = ArgLocs[I++];
7780 RegsToPass.push_back(std::make_pair(
7781 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7782 }
7783 }
7784 }
7785 }
7786
7787 if (!MemOpChains.empty())
7788 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7789
7790 // For indirect calls, we need to save the TOC base to the stack for
7791 // restoration after the call.
7792 if (CFlags.IsIndirect && !Subtarget.usePointerGlueHelper()) {
7793 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7794 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7795 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7796 const MVT PtrVT = Subtarget.getScalarIntVT();
7797 const unsigned TOCSaveOffset =
7798 Subtarget.getFrameLowering()->getTOCSaveOffset();
7799
7800 setUsesTOCBasePtr(DAG);
7801 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7802 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7803 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7804 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7805 Chain = DAG.getStore(
7806 Val.getValue(1), dl, Val, AddPtr,
7807 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7808 }
7809
7810 // Build a sequence of copy-to-reg nodes chained together with token chain
7811 // and flag operands which copy the outgoing args into the appropriate regs.
7812 SDValue InGlue;
7813 for (auto Reg : RegsToPass) {
7814 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7815 InGlue = Chain.getValue(1);
7816 }
7817
7818 const int SPDiff = 0;
7819 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7820 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7821}
7822
7823bool
7824PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7825 MachineFunction &MF, bool isVarArg,
7828 const Type *RetTy) const {
7830 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7831 return CCInfo.CheckReturn(
7832 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7834 : RetCC_PPC);
7835}
7836
7837SDValue
7838PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7839 bool isVarArg,
7841 const SmallVectorImpl<SDValue> &OutVals,
7842 const SDLoc &dl, SelectionDAG &DAG) const {
7844 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7845 *DAG.getContext());
7846 CCInfo.AnalyzeReturn(Outs,
7847 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7849 : RetCC_PPC);
7850
7851 SDValue Glue;
7852 SmallVector<SDValue, 4> RetOps(1, Chain);
7853
7854 // Copy the result values into the output registers.
7855 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7856 CCValAssign &VA = RVLocs[i];
7857 assert(VA.isRegLoc() && "Can only return in registers!");
7858
7859 SDValue Arg = OutVals[RealResIdx];
7860
7861 switch (VA.getLocInfo()) {
7862 default: llvm_unreachable("Unknown loc info!");
7863 case CCValAssign::Full: break;
7864 case CCValAssign::AExt:
7865 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7866 break;
7867 case CCValAssign::ZExt:
7868 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7869 break;
7870 case CCValAssign::SExt:
7871 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7872 break;
7873 }
7874 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7875 bool isLittleEndian = Subtarget.isLittleEndian();
7876 // Legalize ret f64 -> ret 2 x i32.
7877 SDValue SVal =
7878 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7879 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7880 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7881 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7882 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7883 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7884 Glue = Chain.getValue(1);
7885 VA = RVLocs[++i]; // skip ahead to next loc
7886 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7887 } else
7888 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7889 Glue = Chain.getValue(1);
7890 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7891 }
7892
7893 RetOps[0] = Chain; // Update chain.
7894
7895 // Add the glue if we have it.
7896 if (Glue.getNode())
7897 RetOps.push_back(Glue);
7898
7899 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7900}
7901
7902SDValue
7903PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7904 SelectionDAG &DAG) const {
7905 SDLoc dl(Op);
7906
7907 // Get the correct type for integers.
7908 EVT IntVT = Op.getValueType();
7909
7910 // Get the inputs.
7911 SDValue Chain = Op.getOperand(0);
7912 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7913 // Build a DYNAREAOFFSET node.
7914 SDValue Ops[2] = {Chain, FPSIdx};
7915 SDVTList VTs = DAG.getVTList(IntVT);
7916 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7917}
7918
7919SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7920 SelectionDAG &DAG) const {
7921 // When we pop the dynamic allocation we need to restore the SP link.
7922 SDLoc dl(Op);
7923
7924 // Get the correct type for pointers.
7925 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7926
7927 // Construct the stack pointer operand.
7928 bool isPPC64 = Subtarget.isPPC64();
7929 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7930 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7931
7932 // Get the operands for the STACKRESTORE.
7933 SDValue Chain = Op.getOperand(0);
7934 SDValue SaveSP = Op.getOperand(1);
7935
7936 // Load the old link SP.
7937 SDValue LoadLinkSP =
7938 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7939
7940 // Restore the stack pointer.
7941 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7942
7943 // Store the old link SP.
7944 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7945}
7946
7947SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7948 MachineFunction &MF = DAG.getMachineFunction();
7949 bool isPPC64 = Subtarget.isPPC64();
7950 EVT PtrVT = getPointerTy(MF.getDataLayout());
7951
7952 // Get current frame pointer save index. The users of this index will be
7953 // primarily DYNALLOC instructions.
7954 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7955 int RASI = FI->getReturnAddrSaveIndex();
7956
7957 // If the frame pointer save index hasn't been defined yet.
7958 if (!RASI) {
7959 // Find out what the fix offset of the frame pointer save area.
7960 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7961 // Allocate the frame index for frame pointer save area.
7962 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7963 // Save the result.
7964 FI->setReturnAddrSaveIndex(RASI);
7965 }
7966 return DAG.getFrameIndex(RASI, PtrVT);
7967}
7968
7969SDValue
7970PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7971 MachineFunction &MF = DAG.getMachineFunction();
7972 bool isPPC64 = Subtarget.isPPC64();
7973 EVT PtrVT = getPointerTy(MF.getDataLayout());
7974
7975 // Get current frame pointer save index. The users of this index will be
7976 // primarily DYNALLOC instructions.
7977 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7978 int FPSI = FI->getFramePointerSaveIndex();
7979
7980 // If the frame pointer save index hasn't been defined yet.
7981 if (!FPSI) {
7982 // Find out what the fix offset of the frame pointer save area.
7983 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7984 // Allocate the frame index for frame pointer save area.
7985 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7986 // Save the result.
7987 FI->setFramePointerSaveIndex(FPSI);
7988 }
7989 return DAG.getFrameIndex(FPSI, PtrVT);
7990}
7991
7992SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7993 SelectionDAG &DAG) const {
7994 MachineFunction &MF = DAG.getMachineFunction();
7995 // Get the inputs.
7996 SDValue Chain = Op.getOperand(0);
7997 SDValue Size = Op.getOperand(1);
7998 SDLoc dl(Op);
7999
8000 // Get the correct type for pointers.
8001 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8002 // Negate the size.
8003 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8004 DAG.getConstant(0, dl, PtrVT), Size);
8005 // Construct a node for the frame pointer save index.
8006 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8007 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8008 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8009 if (hasInlineStackProbe(MF))
8010 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8011 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8012}
8013
8014SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8015 SelectionDAG &DAG) const {
8016 MachineFunction &MF = DAG.getMachineFunction();
8017
8018 bool isPPC64 = Subtarget.isPPC64();
8019 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8020
8021 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8022 return DAG.getFrameIndex(FI, PtrVT);
8023}
8024
8025SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8026 SelectionDAG &DAG) const {
8027 SDLoc DL(Op);
8028 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8029 DAG.getVTList(MVT::i32, MVT::Other),
8030 Op.getOperand(0), Op.getOperand(1));
8031}
8032
8033SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8034 SelectionDAG &DAG) const {
8035 SDLoc DL(Op);
8036 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8037 Op.getOperand(0), Op.getOperand(1));
8038}
8039
8040SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8041 if (Op.getValueType().isVector())
8042 return LowerVectorLoad(Op, DAG);
8043
8044 assert(Op.getValueType() == MVT::i1 &&
8045 "Custom lowering only for i1 loads");
8046
8047 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8048
8049 SDLoc dl(Op);
8050 LoadSDNode *LD = cast<LoadSDNode>(Op);
8051
8052 SDValue Chain = LD->getChain();
8053 SDValue BasePtr = LD->getBasePtr();
8054 MachineMemOperand *MMO = LD->getMemOperand();
8055
8056 SDValue NewLD =
8057 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8058 BasePtr, MVT::i8, MMO);
8059 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8060
8061 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8062 return DAG.getMergeValues(Ops, dl);
8063}
8064
8065SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8066 if (Op.getOperand(1).getValueType().isVector())
8067 return LowerVectorStore(Op, DAG);
8068
8069 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8070 "Custom lowering only for i1 stores");
8071
8072 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8073
8074 SDLoc dl(Op);
8075 StoreSDNode *ST = cast<StoreSDNode>(Op);
8076
8077 SDValue Chain = ST->getChain();
8078 SDValue BasePtr = ST->getBasePtr();
8079 SDValue Value = ST->getValue();
8080 MachineMemOperand *MMO = ST->getMemOperand();
8081
8083 Value);
8084 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8085}
8086
8087// FIXME: Remove this once the ANDI glue bug is fixed:
8088SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8089 assert(Op.getValueType() == MVT::i1 &&
8090 "Custom lowering only for i1 results");
8091
8092 SDLoc DL(Op);
8093 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8094}
8095
8096SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8097 SelectionDAG &DAG) const {
8098
8099 // Implements a vector truncate that fits in a vector register as a shuffle.
8100 // We want to legalize vector truncates down to where the source fits in
8101 // a vector register (and target is therefore smaller than vector register
8102 // size). At that point legalization will try to custom lower the sub-legal
8103 // result and get here - where we can contain the truncate as a single target
8104 // operation.
8105
8106 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8107 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8108 //
8109 // We will implement it for big-endian ordering as this (where x denotes
8110 // undefined):
8111 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8112 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8113 //
8114 // The same operation in little-endian ordering will be:
8115 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8116 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8117
8118 EVT TrgVT = Op.getValueType();
8119 assert(TrgVT.isVector() && "Vector type expected.");
8120 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8121 EVT EltVT = TrgVT.getVectorElementType();
8122 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8123 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8125 return SDValue();
8126
8127 SDValue N1 = Op.getOperand(0);
8128 EVT SrcVT = N1.getValueType();
8129 unsigned SrcSize = SrcVT.getSizeInBits();
8130 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8133 return SDValue();
8134 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8135 return SDValue();
8136
8137 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8138 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8139
8140 SDLoc DL(Op);
8141 SDValue Op1, Op2;
8142 if (SrcSize == 256) {
8143 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8144 EVT SplitVT =
8146 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8147 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8148 DAG.getConstant(0, DL, VecIdxTy));
8149 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8150 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8151 }
8152 else {
8153 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8154 Op2 = DAG.getUNDEF(WideVT);
8155 }
8156
8157 // First list the elements we want to keep.
8158 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8159 SmallVector<int, 16> ShuffV;
8160 if (Subtarget.isLittleEndian())
8161 for (unsigned i = 0; i < TrgNumElts; ++i)
8162 ShuffV.push_back(i * SizeMult);
8163 else
8164 for (unsigned i = 1; i <= TrgNumElts; ++i)
8165 ShuffV.push_back(i * SizeMult - 1);
8166
8167 // Populate the remaining elements with undefs.
8168 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8169 // ShuffV.push_back(i + WideNumElts);
8170 ShuffV.push_back(WideNumElts + 1);
8171
8172 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8173 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8174 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8175}
8176
8177/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8178/// possible.
8179SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8180 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8181 EVT ResVT = Op.getValueType();
8182 EVT CmpVT = Op.getOperand(0).getValueType();
8183 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8184 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8185 SDLoc dl(Op);
8186
8187 // Without power9-vector, we don't have native instruction for f128 comparison.
8188 // Following transformation to libcall is needed for setcc:
8189 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8190 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8191 SDValue Z = DAG.getSetCC(
8192 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8193 LHS, RHS, CC);
8194 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8195 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8196 }
8197
8198 // Not FP, or using SPE? Not a fsel.
8199 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8200 Subtarget.hasSPE())
8201 return Op;
8202
8203 SDNodeFlags Flags = Op.getNode()->getFlags();
8204
8205 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8206 // presence of infinities.
8207 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8208 switch (CC) {
8209 default:
8210 break;
8211 case ISD::SETOGT:
8212 case ISD::SETGT:
8213 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8214 case ISD::SETOLT:
8215 case ISD::SETLT:
8216 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8217 }
8218 }
8219
8220 // We might be able to do better than this under some circumstances, but in
8221 // general, fsel-based lowering of select is a finite-math-only optimization.
8222 // For more information, see section F.3 of the 2.06 ISA specification.
8223 // With ISA 3.0
8224 if (!Flags.hasNoInfs() || !Flags.hasNoNaNs() || ResVT == MVT::f128)
8225 return Op;
8226
8227 // If the RHS of the comparison is a 0.0, we don't need to do the
8228 // subtraction at all.
8229 SDValue Sel1;
8231 switch (CC) {
8232 default: break; // SETUO etc aren't handled by fsel.
8233 case ISD::SETNE:
8234 std::swap(TV, FV);
8235 [[fallthrough]];
8236 case ISD::SETEQ:
8237 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8238 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8239 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8240 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8241 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8242 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8243 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8244 case ISD::SETULT:
8245 case ISD::SETLT:
8246 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8247 [[fallthrough]];
8248 case ISD::SETOGE:
8249 case ISD::SETGE:
8250 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8251 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8252 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8253 case ISD::SETUGT:
8254 case ISD::SETGT:
8255 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8256 [[fallthrough]];
8257 case ISD::SETOLE:
8258 case ISD::SETLE:
8259 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8260 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8261 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8262 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8263 }
8264
8265 SDValue Cmp;
8266 switch (CC) {
8267 default: break; // SETUO etc aren't handled by fsel.
8268 case ISD::SETNE:
8269 std::swap(TV, FV);
8270 [[fallthrough]];
8271 case ISD::SETEQ:
8272 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8273 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8274 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8275 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8276 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8277 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8278 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8279 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8280 case ISD::SETULT:
8281 case ISD::SETLT:
8282 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8283 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8284 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8285 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8286 case ISD::SETOGE:
8287 case ISD::SETGE:
8288 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8289 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8290 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8291 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8292 case ISD::SETUGT:
8293 case ISD::SETGT:
8294 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8295 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8296 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8297 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8298 case ISD::SETOLE:
8299 case ISD::SETLE:
8300 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8301 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8302 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8303 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8304 }
8305 return Op;
8306}
8307
8308static unsigned getPPCStrictOpcode(unsigned Opc) {
8309 switch (Opc) {
8310 default:
8311 llvm_unreachable("No strict version of this opcode!");
8312 case PPCISD::FCTIDZ:
8313 return PPCISD::STRICT_FCTIDZ;
8314 case PPCISD::FCTIWZ:
8315 return PPCISD::STRICT_FCTIWZ;
8316 case PPCISD::FCTIDUZ:
8317 return PPCISD::STRICT_FCTIDUZ;
8318 case PPCISD::FCTIWUZ:
8319 return PPCISD::STRICT_FCTIWUZ;
8320 case PPCISD::FCFID:
8321 return PPCISD::STRICT_FCFID;
8322 case PPCISD::FCFIDU:
8323 return PPCISD::STRICT_FCFIDU;
8324 case PPCISD::FCFIDS:
8325 return PPCISD::STRICT_FCFIDS;
8326 case PPCISD::FCFIDUS:
8327 return PPCISD::STRICT_FCFIDUS;
8328 }
8329}
8330
8332 const PPCSubtarget &Subtarget) {
8333 SDLoc dl(Op);
8334 bool IsStrict = Op->isStrictFPOpcode();
8335 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8336 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8337
8338 // TODO: Any other flags to propagate?
8339 SDNodeFlags Flags;
8340 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8341
8342 // For strict nodes, source is the second operand.
8343 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8344 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8345 MVT DestTy = Op.getSimpleValueType();
8346 assert(Src.getValueType().isFloatingPoint() &&
8347 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8348 DestTy == MVT::i64) &&
8349 "Invalid FP_TO_INT types");
8350 if (Src.getValueType() == MVT::f32) {
8351 if (IsStrict) {
8352 Src =
8354 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8355 Chain = Src.getValue(1);
8356 } else
8357 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8358 }
8359 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8360 DestTy = Subtarget.getScalarIntVT();
8361 unsigned Opc = ISD::DELETED_NODE;
8362 switch (DestTy.SimpleTy) {
8363 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8364 case MVT::i32:
8365 Opc = IsSigned ? PPCISD::FCTIWZ
8366 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8367 break;
8368 case MVT::i64:
8369 assert((IsSigned || Subtarget.hasFPCVT()) &&
8370 "i64 FP_TO_UINT is supported only with FPCVT");
8371 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8372 }
8373 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8374 SDValue Conv;
8375 if (IsStrict) {
8377 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8378 Flags);
8379 } else {
8380 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8381 }
8382 return Conv;
8383}
8384
8385void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8386 SelectionDAG &DAG,
8387 const SDLoc &dl) const {
8388 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8389 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8390 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8391 bool IsStrict = Op->isStrictFPOpcode();
8392
8393 // Convert the FP value to an int value through memory.
8394 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8395 (IsSigned || Subtarget.hasFPCVT());
8396 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8397 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8398 MachinePointerInfo MPI =
8400
8401 // Emit a store to the stack slot.
8402 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8403 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8404 if (i32Stack) {
8405 MachineFunction &MF = DAG.getMachineFunction();
8406 Alignment = Align(4);
8407 MachineMemOperand *MMO =
8408 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8409 SDValue Ops[] = { Chain, Tmp, FIPtr };
8410 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8411 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8412 } else
8413 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8414
8415 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8416 // add in a bias on big endian.
8417 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8418 !Subtarget.isLittleEndian()) {
8419 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8420 DAG.getConstant(4, dl, FIPtr.getValueType()));
8421 MPI = MPI.getWithOffset(4);
8422 }
8423
8424 RLI.Chain = Chain;
8425 RLI.Ptr = FIPtr;
8426 RLI.MPI = MPI;
8427 RLI.Alignment = Alignment;
8428}
8429
8430/// Custom lowers floating point to integer conversions to use
8431/// the direct move instructions available in ISA 2.07 to avoid the
8432/// need for load/store combinations.
8433SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8434 SelectionDAG &DAG,
8435 const SDLoc &dl) const {
8436 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8437 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8438 if (Op->isStrictFPOpcode())
8439 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8440 else
8441 return Mov;
8442}
8443
8444SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8445 const SDLoc &dl) const {
8446 bool IsStrict = Op->isStrictFPOpcode();
8447 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8448 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8449 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8450 EVT SrcVT = Src.getValueType();
8451 EVT DstVT = Op.getValueType();
8452
8453 // FP to INT conversions are legal for f128.
8454 if (SrcVT == MVT::f128)
8455 return Subtarget.hasP9Vector() ? Op : SDValue();
8456
8457 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8458 // PPC (the libcall is not available).
8459 if (SrcVT == MVT::ppcf128) {
8460 if (DstVT == MVT::i32) {
8461 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8462 // set other fast-math flags to FP operations in both strict and
8463 // non-strict cases. (FP_TO_SINT, FSUB)
8464 SDNodeFlags Flags;
8465 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8466
8467 if (IsSigned) {
8468 SDValue Lo, Hi;
8469 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8470
8471 // Add the two halves of the long double in round-to-zero mode, and use
8472 // a smaller FP_TO_SINT.
8473 if (IsStrict) {
8474 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8475 DAG.getVTList(MVT::f64, MVT::Other),
8476 {Op.getOperand(0), Lo, Hi}, Flags);
8477 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8478 DAG.getVTList(MVT::i32, MVT::Other),
8479 {Res.getValue(1), Res}, Flags);
8480 } else {
8481 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8482 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8483 }
8484 } else {
8485 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8486 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8487 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8488 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8489 if (IsStrict) {
8490 // Sel = Src < 0x80000000
8491 // FltOfs = select Sel, 0.0, 0x80000000
8492 // IntOfs = select Sel, 0, 0x80000000
8493 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8494 SDValue Chain = Op.getOperand(0);
8495 EVT SetCCVT =
8496 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8497 EVT DstSetCCVT =
8498 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8499 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8500 Chain, true);
8501 Chain = Sel.getValue(1);
8502
8503 SDValue FltOfs = DAG.getSelect(
8504 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8505 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8506
8507 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8508 DAG.getVTList(SrcVT, MVT::Other),
8509 {Chain, Src, FltOfs}, Flags);
8510 Chain = Val.getValue(1);
8511 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8512 DAG.getVTList(DstVT, MVT::Other),
8513 {Chain, Val}, Flags);
8514 Chain = SInt.getValue(1);
8515 SDValue IntOfs = DAG.getSelect(
8516 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8517 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8518 return DAG.getMergeValues({Result, Chain}, dl);
8519 } else {
8520 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8521 // FIXME: generated code sucks.
8522 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8523 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8524 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8525 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8526 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8527 }
8528 }
8529 }
8530
8531 return SDValue();
8532 }
8533
8534 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8535 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8536
8537 ReuseLoadInfo RLI;
8538 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8539
8540 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8541 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8542}
8543
8544// We're trying to insert a regular store, S, and then a load, L. If the
8545// incoming value, O, is a load, we might just be able to have our load use the
8546// address used by O. However, we don't know if anything else will store to
8547// that address before we can load from it. To prevent this situation, we need
8548// to insert our load, L, into the chain as a peer of O. To do this, we give L
8549// the same chain operand as O, we create a token factor from the chain results
8550// of O and L, and we replace all uses of O's chain result with that token
8551// factor (this last part is handled by makeEquivalentMemoryOrdering).
8552bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8553 ReuseLoadInfo &RLI,
8554 SelectionDAG &DAG,
8555 ISD::LoadExtType ET) const {
8556 // Conservatively skip reusing for constrained FP nodes.
8557 if (Op->isStrictFPOpcode())
8558 return false;
8559
8560 SDLoc dl(Op);
8561 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8562 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8563 if (ET == ISD::NON_EXTLOAD &&
8564 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8565 isOperationLegalOrCustom(Op.getOpcode(),
8566 Op.getOperand(0).getValueType())) {
8567
8568 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8569 return true;
8570 }
8571
8572 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8573 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8574 LD->isNonTemporal())
8575 return false;
8576 if (LD->getMemoryVT() != MemVT)
8577 return false;
8578
8579 // If the result of the load is an illegal type, then we can't build a
8580 // valid chain for reuse since the legalised loads and token factor node that
8581 // ties the legalised loads together uses a different output chain then the
8582 // illegal load.
8583 if (!isTypeLegal(LD->getValueType(0)))
8584 return false;
8585
8586 RLI.Ptr = LD->getBasePtr();
8587 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8588 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8589 "Non-pre-inc AM on PPC?");
8590 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8591 LD->getOffset());
8592 }
8593
8594 RLI.Chain = LD->getChain();
8595 RLI.MPI = LD->getPointerInfo();
8596 RLI.IsDereferenceable = LD->isDereferenceable();
8597 RLI.IsInvariant = LD->isInvariant();
8598 RLI.Alignment = LD->getAlign();
8599 RLI.AAInfo = LD->getAAInfo();
8600 RLI.Ranges = LD->getRanges();
8601
8602 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8603 return true;
8604}
8605
8606/// Analyze profitability of direct move
8607/// prefer float load to int load plus direct move
8608/// when there is no integer use of int load
8609bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8610 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8611 if (Origin->getOpcode() != ISD::LOAD)
8612 return true;
8613
8614 // If there is no LXSIBZX/LXSIHZX, like Power8,
8615 // prefer direct move if the memory size is 1 or 2 bytes.
8616 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8617 if (!Subtarget.hasP9Vector() &&
8618 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8619 return true;
8620
8621 for (SDUse &Use : Origin->uses()) {
8622
8623 // Only look at the users of the loaded value.
8624 if (Use.getResNo() != 0)
8625 continue;
8626
8627 SDNode *User = Use.getUser();
8628 if (User->getOpcode() != ISD::SINT_TO_FP &&
8629 User->getOpcode() != ISD::UINT_TO_FP &&
8630 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8631 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8632 return true;
8633 }
8634
8635 return false;
8636}
8637
8639 const PPCSubtarget &Subtarget,
8640 SDValue Chain = SDValue()) {
8641 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8642 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8643 SDLoc dl(Op);
8644
8645 // TODO: Any other flags to propagate?
8646 SDNodeFlags Flags;
8647 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8648
8649 // If we have FCFIDS, then use it when converting to single-precision.
8650 // Otherwise, convert to double-precision and then round.
8651 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8652 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8653 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8654 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8655 if (Op->isStrictFPOpcode()) {
8656 if (!Chain)
8657 Chain = Op.getOperand(0);
8658 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8659 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8660 } else
8661 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8662}
8663
8664/// Custom lowers integer to floating point conversions to use
8665/// the direct move instructions available in ISA 2.07 to avoid the
8666/// need for load/store combinations.
8667SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8668 SelectionDAG &DAG,
8669 const SDLoc &dl) const {
8670 assert((Op.getValueType() == MVT::f32 ||
8671 Op.getValueType() == MVT::f64) &&
8672 "Invalid floating point type as target of conversion");
8673 assert(Subtarget.hasFPCVT() &&
8674 "Int to FP conversions with direct moves require FPCVT");
8675 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8676 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8677 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8678 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8679 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8680 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8681 return convertIntToFP(Op, Mov, DAG, Subtarget);
8682}
8683
8684static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8685
8686 EVT VecVT = Vec.getValueType();
8687 assert(VecVT.isVector() && "Expected a vector type.");
8688 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8689
8690 EVT EltVT = VecVT.getVectorElementType();
8691 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8692 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8693
8694 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8695 SmallVector<SDValue, 16> Ops(NumConcat);
8696 Ops[0] = Vec;
8697 SDValue UndefVec = DAG.getUNDEF(VecVT);
8698 for (unsigned i = 1; i < NumConcat; ++i)
8699 Ops[i] = UndefVec;
8700
8701 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8702}
8703
8704SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8705 const SDLoc &dl) const {
8706 bool IsStrict = Op->isStrictFPOpcode();
8707 unsigned Opc = Op.getOpcode();
8708 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8711 "Unexpected conversion type");
8712 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8713 "Supports conversions to v2f64/v4f32 only.");
8714
8715 // TODO: Any other flags to propagate?
8716 SDNodeFlags Flags;
8717 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8718
8719 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8720 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8721
8722 SDValue Wide = widenVec(DAG, Src, dl);
8723 EVT WideVT = Wide.getValueType();
8724 unsigned WideNumElts = WideVT.getVectorNumElements();
8725 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8726
8727 SmallVector<int, 16> ShuffV;
8728 for (unsigned i = 0; i < WideNumElts; ++i)
8729 ShuffV.push_back(i + WideNumElts);
8730
8731 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8732 int SaveElts = FourEltRes ? 4 : 2;
8733 if (Subtarget.isLittleEndian())
8734 for (int i = 0; i < SaveElts; i++)
8735 ShuffV[i * Stride] = i;
8736 else
8737 for (int i = 1; i <= SaveElts; i++)
8738 ShuffV[i * Stride - 1] = i - 1;
8739
8740 SDValue ShuffleSrc2 =
8741 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8742 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8743
8744 SDValue Extend;
8745 if (SignedConv) {
8746 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8747 EVT ExtVT = Src.getValueType();
8748 if (Subtarget.hasP9Altivec())
8749 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8750 IntermediateVT.getVectorNumElements());
8751
8752 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8753 DAG.getValueType(ExtVT));
8754 } else
8755 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8756
8757 if (IsStrict)
8758 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8759 {Op.getOperand(0), Extend}, Flags);
8760
8761 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8762}
8763
8764SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8765 SelectionDAG &DAG) const {
8766 SDLoc dl(Op);
8767 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8768 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8769 bool IsStrict = Op->isStrictFPOpcode();
8770 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8771 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8772
8773 // TODO: Any other flags to propagate?
8774 SDNodeFlags Flags;
8775 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8776
8777 EVT InVT = Src.getValueType();
8778 EVT OutVT = Op.getValueType();
8779 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8780 isOperationCustom(Op.getOpcode(), InVT))
8781 return LowerINT_TO_FPVector(Op, DAG, dl);
8782
8783 // Conversions to f128 are legal.
8784 if (Op.getValueType() == MVT::f128)
8785 return Subtarget.hasP9Vector() ? Op : SDValue();
8786
8787 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8788 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8789 return SDValue();
8790
8791 if (Src.getValueType() == MVT::i1) {
8792 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8793 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8794 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8795 if (IsStrict)
8796 return DAG.getMergeValues({Sel, Chain}, dl);
8797 else
8798 return Sel;
8799 }
8800
8801 // If we have direct moves, we can do all the conversion, skip the store/load
8802 // however, without FPCVT we can't do most conversions.
8803 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8804 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8805 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8806
8807 assert((IsSigned || Subtarget.hasFPCVT()) &&
8808 "UINT_TO_FP is supported only with FPCVT");
8809
8810 if (Src.getValueType() == MVT::i64) {
8811 SDValue SINT = Src;
8812 // When converting to single-precision, we actually need to convert
8813 // to double-precision first and then round to single-precision.
8814 // To avoid double-rounding effects during that operation, we have
8815 // to prepare the input operand. Bits that might be truncated when
8816 // converting to double-precision are replaced by a bit that won't
8817 // be lost at this stage, but is below the single-precision rounding
8818 // position.
8819 //
8820 // However, if afn is in effect, accept double
8821 // rounding to avoid the extra overhead.
8822 // FIXME: Currently INT_TO_FP can't support fast math flags because
8823 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8824 // false.
8825 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8826 !Op->getFlags().hasApproximateFuncs()) {
8827
8828 // Twiddle input to make sure the low 11 bits are zero. (If this
8829 // is the case, we are guaranteed the value will fit into the 53 bit
8830 // mantissa of an IEEE double-precision value without rounding.)
8831 // If any of those low 11 bits were not zero originally, make sure
8832 // bit 12 (value 2048) is set instead, so that the final rounding
8833 // to single-precision gets the correct result.
8834 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8835 SINT, DAG.getConstant(2047, dl, MVT::i64));
8836 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8837 Round, DAG.getConstant(2047, dl, MVT::i64));
8838 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8839 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8840 DAG.getSignedConstant(-2048, dl, MVT::i64));
8841
8842 // However, we cannot use that value unconditionally: if the magnitude
8843 // of the input value is small, the bit-twiddling we did above might
8844 // end up visibly changing the output. Fortunately, in that case, we
8845 // don't need to twiddle bits since the original input will convert
8846 // exactly to double-precision floating-point already. Therefore,
8847 // construct a conditional to use the original value if the top 11
8848 // bits are all sign-bit copies, and use the rounded value computed
8849 // above otherwise.
8850 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8851 SINT, DAG.getConstant(53, dl, MVT::i32));
8852 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8853 Cond, DAG.getConstant(1, dl, MVT::i64));
8854 Cond = DAG.getSetCC(
8855 dl,
8856 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8857 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8858
8859 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8860 }
8861
8862 ReuseLoadInfo RLI;
8863 SDValue Bits;
8864
8865 MachineFunction &MF = DAG.getMachineFunction();
8866 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8867 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8868 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8869 if (RLI.ResChain)
8870 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8871 } else if (Subtarget.hasLFIWAX() &&
8872 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8873 MachineMemOperand *MMO =
8875 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8876 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8877 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8878 DAG.getVTList(MVT::f64, MVT::Other),
8879 Ops, MVT::i32, MMO);
8880 if (RLI.ResChain)
8881 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8882 } else if (Subtarget.hasFPCVT() &&
8883 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8884 MachineMemOperand *MMO =
8886 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8887 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8888 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8889 DAG.getVTList(MVT::f64, MVT::Other),
8890 Ops, MVT::i32, MMO);
8891 if (RLI.ResChain)
8892 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8893 } else if (((Subtarget.hasLFIWAX() &&
8894 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8895 (Subtarget.hasFPCVT() &&
8896 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8897 SINT.getOperand(0).getValueType() == MVT::i32) {
8898 MachineFrameInfo &MFI = MF.getFrameInfo();
8899 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8900
8901 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8902 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8903
8904 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8906 DAG.getMachineFunction(), FrameIdx));
8907 Chain = Store;
8908
8909 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8910 "Expected an i32 store");
8911
8912 RLI.Ptr = FIdx;
8913 RLI.Chain = Chain;
8914 RLI.MPI =
8916 RLI.Alignment = Align(4);
8917
8918 MachineMemOperand *MMO =
8920 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8921 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8923 PPCISD::LFIWZX : PPCISD::LFIWAX,
8924 dl, DAG.getVTList(MVT::f64, MVT::Other),
8925 Ops, MVT::i32, MMO);
8926 Chain = Bits.getValue(1);
8927 } else
8928 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8929
8930 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8931 if (IsStrict)
8932 Chain = FP.getValue(1);
8933
8934 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8935 if (IsStrict)
8936 FP = DAG.getNode(
8937 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8938 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8939 Flags);
8940 else
8941 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8942 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8943 }
8944 return FP;
8945 }
8946
8947 assert(Src.getValueType() == MVT::i32 &&
8948 "Unhandled INT_TO_FP type in custom expander!");
8949 // Since we only generate this in 64-bit mode, we can take advantage of
8950 // 64-bit registers. In particular, sign extend the input value into the
8951 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8952 // then lfd it and fcfid it.
8953 MachineFunction &MF = DAG.getMachineFunction();
8954 MachineFrameInfo &MFI = MF.getFrameInfo();
8955 EVT PtrVT = getPointerTy(MF.getDataLayout());
8956
8957 SDValue Ld;
8958 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8959 ReuseLoadInfo RLI;
8960 bool ReusingLoad;
8961 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8962 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8963 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8964
8965 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8967 DAG.getMachineFunction(), FrameIdx));
8968 Chain = Store;
8969
8970 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8971 "Expected an i32 store");
8972
8973 RLI.Ptr = FIdx;
8974 RLI.Chain = Chain;
8975 RLI.MPI =
8977 RLI.Alignment = Align(4);
8978 }
8979
8980 MachineMemOperand *MMO =
8982 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8983 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8984 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8985 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8986 MVT::i32, MMO);
8987 Chain = Ld.getValue(1);
8988 if (ReusingLoad && RLI.ResChain) {
8989 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8990 }
8991 } else {
8992 assert(Subtarget.isPPC64() &&
8993 "i32->FP without LFIWAX supported only on PPC64");
8994
8995 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8996 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8997
8998 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8999
9000 // STD the extended value into the stack slot.
9001 SDValue Store = DAG.getStore(
9002 Chain, dl, Ext64, FIdx,
9004 Chain = Store;
9005
9006 // Load the value as a double.
9007 Ld = DAG.getLoad(
9008 MVT::f64, dl, Chain, FIdx,
9010 Chain = Ld.getValue(1);
9011 }
9012
9013 // FCFID it and return it.
9014 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9015 if (IsStrict)
9016 Chain = FP.getValue(1);
9017 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9018 if (IsStrict)
9019 FP = DAG.getNode(
9020 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9021 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9022 else
9023 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9024 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9025 }
9026 return FP;
9027}
9028
9029SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9030 SelectionDAG &DAG) const {
9031 SDLoc Dl(Op);
9032 MachineFunction &MF = DAG.getMachineFunction();
9033 EVT PtrVT = getPointerTy(MF.getDataLayout());
9034 SDValue Chain = Op.getOperand(0);
9035
9036 // If requested mode is constant, just use simpler mtfsb/mffscrni
9037 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9038 uint64_t Mode = CVal->getZExtValue();
9039 assert(Mode < 4 && "Unsupported rounding mode!");
9040 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9041 if (Subtarget.isISA3_0())
9042 return SDValue(
9043 DAG.getMachineNode(
9044 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9045 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9046 1);
9047 SDNode *SetHi = DAG.getMachineNode(
9048 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9049 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9050 SDNode *SetLo = DAG.getMachineNode(
9051 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9052 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9053 return SDValue(SetLo, 0);
9054 }
9055
9056 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9057 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9058 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9059 DAG.getConstant(3, Dl, MVT::i32));
9060 SDValue DstFlag = DAG.getNode(
9061 ISD::XOR, Dl, MVT::i32, SrcFlag,
9062 DAG.getNode(ISD::AND, Dl, MVT::i32,
9063 DAG.getNOT(Dl,
9064 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9065 MVT::i32),
9066 One));
9067 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9068 SDValue MFFS;
9069 if (!Subtarget.isISA3_0()) {
9070 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9071 Chain = MFFS.getValue(1);
9072 }
9073 SDValue NewFPSCR;
9074 if (Subtarget.isPPC64()) {
9075 if (Subtarget.isISA3_0()) {
9076 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9077 } else {
9078 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9079 SDNode *InsertRN = DAG.getMachineNode(
9080 PPC::RLDIMI, Dl, MVT::i64,
9081 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9082 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9083 DAG.getTargetConstant(0, Dl, MVT::i32),
9084 DAG.getTargetConstant(62, Dl, MVT::i32)});
9085 NewFPSCR = SDValue(InsertRN, 0);
9086 }
9087 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9088 } else {
9089 // In 32-bit mode, store f64, load and update the lower half.
9090 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9091 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9092 SDValue Addr = Subtarget.isLittleEndian()
9093 ? StackSlot
9094 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9095 DAG.getConstant(4, Dl, PtrVT));
9096 if (Subtarget.isISA3_0()) {
9097 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9098 } else {
9099 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9100 SDValue Tmp =
9101 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9102 Chain = Tmp.getValue(1);
9103 Tmp = SDValue(DAG.getMachineNode(
9104 PPC::RLWIMI, Dl, MVT::i32,
9105 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9106 DAG.getTargetConstant(30, Dl, MVT::i32),
9107 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9108 0);
9109 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9110 }
9111 NewFPSCR =
9112 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9113 Chain = NewFPSCR.getValue(1);
9114 }
9115 if (Subtarget.isISA3_0())
9116 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9117 {NewFPSCR, Chain}),
9118 1);
9119 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9120 SDNode *MTFSF = DAG.getMachineNode(
9121 PPC::MTFSF, Dl, MVT::Other,
9122 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9123 return SDValue(MTFSF, 0);
9124}
9125
9126SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9127 SelectionDAG &DAG) const {
9128 SDLoc dl(Op);
9129 /*
9130 The rounding mode is in bits 30:31 of FPSR, and has the following
9131 settings:
9132 00 Round to nearest
9133 01 Round to 0
9134 10 Round to +inf
9135 11 Round to -inf
9136
9137 GET_ROUNDING, on the other hand, expects the following:
9138 -1 Undefined
9139 0 Round to 0
9140 1 Round to nearest
9141 2 Round to +inf
9142 3 Round to -inf
9143
9144 To perform the conversion, we do:
9145 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9146 */
9147
9148 MachineFunction &MF = DAG.getMachineFunction();
9149 EVT VT = Op.getValueType();
9150 EVT PtrVT = getPointerTy(MF.getDataLayout());
9151
9152 // Save FP Control Word to register
9153 SDValue Chain = Op.getOperand(0);
9154 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9155 Chain = MFFS.getValue(1);
9156
9157 SDValue CWD;
9158 if (isTypeLegal(MVT::i64)) {
9159 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9160 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9161 } else {
9162 // Save FP register to stack slot
9163 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9164 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9165 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9166
9167 // Load FP Control Word from low 32 bits of stack slot.
9169 "Stack slot adjustment is valid only on big endian subtargets!");
9170 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9171 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9172 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9173 Chain = CWD.getValue(1);
9174 }
9175
9176 // Transform as necessary
9177 SDValue CWD1 =
9178 DAG.getNode(ISD::AND, dl, MVT::i32,
9179 CWD, DAG.getConstant(3, dl, MVT::i32));
9180 SDValue CWD2 =
9181 DAG.getNode(ISD::SRL, dl, MVT::i32,
9182 DAG.getNode(ISD::AND, dl, MVT::i32,
9183 DAG.getNode(ISD::XOR, dl, MVT::i32,
9184 CWD, DAG.getConstant(3, dl, MVT::i32)),
9185 DAG.getConstant(3, dl, MVT::i32)),
9186 DAG.getConstant(1, dl, MVT::i32));
9187
9188 SDValue RetVal =
9189 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9190
9191 RetVal =
9193 dl, VT, RetVal);
9194
9195 return DAG.getMergeValues({RetVal, Chain}, dl);
9196}
9197
9198SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9199 EVT VT = Op.getValueType();
9200 uint64_t BitWidth = VT.getSizeInBits();
9201 SDLoc dl(Op);
9202 assert(Op.getNumOperands() == 3 &&
9203 VT == Op.getOperand(1).getValueType() &&
9204 "Unexpected SHL!");
9205
9206 // Expand into a bunch of logical ops. Note that these ops
9207 // depend on the PPC behavior for oversized shift amounts.
9208 SDValue Lo = Op.getOperand(0);
9209 SDValue Hi = Op.getOperand(1);
9210 SDValue Amt = Op.getOperand(2);
9211 EVT AmtVT = Amt.getValueType();
9212
9213 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9214 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9215 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9216 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9217 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9218 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9219 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9220 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9221 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9222 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9223 SDValue OutOps[] = { OutLo, OutHi };
9224 return DAG.getMergeValues(OutOps, dl);
9225}
9226
9227SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9228 EVT VT = Op.getValueType();
9229 SDLoc dl(Op);
9230 uint64_t BitWidth = VT.getSizeInBits();
9231 assert(Op.getNumOperands() == 3 &&
9232 VT == Op.getOperand(1).getValueType() &&
9233 "Unexpected SRL!");
9234
9235 // Expand into a bunch of logical ops. Note that these ops
9236 // depend on the PPC behavior for oversized shift amounts.
9237 SDValue Lo = Op.getOperand(0);
9238 SDValue Hi = Op.getOperand(1);
9239 SDValue Amt = Op.getOperand(2);
9240 EVT AmtVT = Amt.getValueType();
9241
9242 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9243 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9244 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9245 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9246 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9247 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9248 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9249 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9250 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9251 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9252 SDValue OutOps[] = { OutLo, OutHi };
9253 return DAG.getMergeValues(OutOps, dl);
9254}
9255
9256SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9257 SDLoc dl(Op);
9258 EVT VT = Op.getValueType();
9259 uint64_t BitWidth = VT.getSizeInBits();
9260 assert(Op.getNumOperands() == 3 &&
9261 VT == Op.getOperand(1).getValueType() &&
9262 "Unexpected SRA!");
9263
9264 // Expand into a bunch of logical ops, followed by a select_cc.
9265 SDValue Lo = Op.getOperand(0);
9266 SDValue Hi = Op.getOperand(1);
9267 SDValue Amt = Op.getOperand(2);
9268 EVT AmtVT = Amt.getValueType();
9269
9270 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9271 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9272 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9273 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9274 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9275 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9276 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9277 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9278 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9279 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9280 Tmp4, Tmp6, ISD::SETLE);
9281 SDValue OutOps[] = { OutLo, OutHi };
9282 return DAG.getMergeValues(OutOps, dl);
9283}
9284
9285SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9286 SelectionDAG &DAG) const {
9287 SDLoc dl(Op);
9288 EVT VT = Op.getValueType();
9289 unsigned BitWidth = VT.getSizeInBits();
9290
9291 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9292 SDValue X = Op.getOperand(0);
9293 SDValue Y = Op.getOperand(1);
9294 SDValue Z = Op.getOperand(2);
9295 EVT AmtVT = Z.getValueType();
9296
9297 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9298 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9299 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9300 // on PowerPC shift by BW being well defined.
9301 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9302 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9303 SDValue SubZ =
9304 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9305 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9306 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9307 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9308}
9309
9310//===----------------------------------------------------------------------===//
9311// Vector related lowering.
9312//
9313
9314/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9315/// element size of SplatSize. Cast the result to VT.
9316static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9317 SelectionDAG &DAG, const SDLoc &dl) {
9318 static const MVT VTys[] = { // canonical VT to use for each size.
9319 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9320 };
9321
9322 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9323
9324 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9325 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9326 SplatSize = 1;
9327 Val = 0xFF;
9328 }
9329
9330 EVT CanonicalVT = VTys[SplatSize-1];
9331
9332 // Build a canonical splat for this value.
9333 // Explicitly truncate APInt here, as this API is used with a mix of
9334 // signed and unsigned values.
9335 return DAG.getBitcast(
9336 ReqVT,
9337 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9338}
9339
9340/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9341/// specified intrinsic ID.
9343 const SDLoc &dl, EVT DestVT = MVT::Other) {
9344 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9345 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9346 DAG.getConstant(IID, dl, MVT::i32), Op);
9347}
9348
9349/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9350/// specified intrinsic ID.
9352 SelectionDAG &DAG, const SDLoc &dl,
9353 EVT DestVT = MVT::Other) {
9354 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9355 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9356 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9357}
9358
9359/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9360/// specified intrinsic ID.
9361static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9362 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9363 EVT DestVT = MVT::Other) {
9364 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9365 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9366 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9367}
9368
9369/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9370/// amount. The result has the specified value type.
9371static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9372 SelectionDAG &DAG, const SDLoc &dl) {
9373 // Force LHS/RHS to be the right type.
9374 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9375 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9376
9377 int Ops[16];
9378 for (unsigned i = 0; i != 16; ++i)
9379 Ops[i] = i + Amt;
9380 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9381 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9382}
9383
9384/// Do we have an efficient pattern in a .td file for this node?
9385///
9386/// \param V - pointer to the BuildVectorSDNode being matched
9387/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9388///
9389/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9390/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9391/// the opposite is true (expansion is beneficial) are:
9392/// - The node builds a vector out of integers that are not 32 or 64-bits
9393/// - The node builds a vector out of constants
9394/// - The node is a "load-and-splat"
9395/// In all other cases, we will choose to keep the BUILD_VECTOR.
9397 bool HasDirectMove,
9398 bool HasP8Vector) {
9399 EVT VecVT = V->getValueType(0);
9400 bool RightType = VecVT == MVT::v2f64 ||
9401 (HasP8Vector && VecVT == MVT::v4f32) ||
9402 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9403 if (!RightType)
9404 return false;
9405
9406 bool IsSplat = true;
9407 bool IsLoad = false;
9408 SDValue Op0 = V->getOperand(0);
9409
9410 // This function is called in a block that confirms the node is not a constant
9411 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9412 // different constants.
9413 if (V->isConstant())
9414 return false;
9415 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9416 if (V->getOperand(i).isUndef())
9417 return false;
9418 // We want to expand nodes that represent load-and-splat even if the
9419 // loaded value is a floating point truncation or conversion to int.
9420 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9421 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9422 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9423 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9424 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9425 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9426 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9427 IsLoad = true;
9428 // If the operands are different or the input is not a load and has more
9429 // uses than just this BV node, then it isn't a splat.
9430 if (V->getOperand(i) != Op0 ||
9431 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9432 IsSplat = false;
9433 }
9434 return !(IsSplat && IsLoad);
9435}
9436
9437// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9438SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9439
9440 SDLoc dl(Op);
9441 SDValue Op0 = Op->getOperand(0);
9442
9443 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9444 (Op.getValueType() != MVT::f128))
9445 return SDValue();
9446
9447 SDValue Lo = Op0.getOperand(0);
9448 SDValue Hi = Op0.getOperand(1);
9449 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9450 return SDValue();
9451
9452 if (!Subtarget.isLittleEndian())
9453 std::swap(Lo, Hi);
9454
9455 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9456}
9457
9458static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9459 const SDValue *InputLoad = &Op;
9460 while (InputLoad->getOpcode() == ISD::BITCAST)
9461 InputLoad = &InputLoad->getOperand(0);
9462 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9463 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9464 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9465 InputLoad = &InputLoad->getOperand(0);
9466 }
9467 if (InputLoad->getOpcode() != ISD::LOAD)
9468 return nullptr;
9469 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9470 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9471}
9472
9473// Convert the argument APFloat to a single precision APFloat if there is no
9474// loss in information during the conversion to single precision APFloat and the
9475// resulting number is not a denormal number. Return true if successful.
9477 APFloat APFloatToConvert = ArgAPFloat;
9478 bool LosesInfo = true;
9480 &LosesInfo);
9481 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9482 if (Success)
9483 ArgAPFloat = APFloatToConvert;
9484 return Success;
9485}
9486
9487// Bitcast the argument APInt to a double and convert it to a single precision
9488// APFloat, bitcast the APFloat to an APInt and assign it to the original
9489// argument if there is no loss in information during the conversion from
9490// double to single precision APFloat and the resulting number is not a denormal
9491// number. Return true if successful.
9493 double DpValue = ArgAPInt.bitsToDouble();
9494 APFloat APFloatDp(DpValue);
9495 bool Success = convertToNonDenormSingle(APFloatDp);
9496 if (Success)
9497 ArgAPInt = APFloatDp.bitcastToAPInt();
9498 return Success;
9499}
9500
9501// Nondestructive check for convertTonNonDenormSingle.
9503 // Only convert if it loses info, since XXSPLTIDP should
9504 // handle the other case.
9505 APFloat APFloatToConvert = ArgAPFloat;
9506 bool LosesInfo = true;
9508 &LosesInfo);
9509
9510 return (!LosesInfo && !APFloatToConvert.isDenormal());
9511}
9512
9513static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9514 unsigned &Opcode) {
9515 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9516 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9517 return false;
9518
9519 EVT Ty = Op->getValueType(0);
9520 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9521 // as we cannot handle extending loads for these types.
9522 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9523 ISD::isNON_EXTLoad(InputNode))
9524 return true;
9525
9526 EVT MemVT = InputNode->getMemoryVT();
9527 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9528 // memory VT is the same vector element VT type.
9529 // The loads feeding into the v8i16 and v16i8 types will be extending because
9530 // scalar i8/i16 are not legal types.
9531 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9532 (MemVT == Ty.getVectorElementType()))
9533 return true;
9534
9535 if (Ty == MVT::v2i64) {
9536 // Check the extend type, when the input type is i32, and the output vector
9537 // type is v2i64.
9538 if (MemVT == MVT::i32) {
9539 if (ISD::isZEXTLoad(InputNode))
9540 Opcode = PPCISD::ZEXT_LD_SPLAT;
9541 if (ISD::isSEXTLoad(InputNode))
9542 Opcode = PPCISD::SEXT_LD_SPLAT;
9543 }
9544 return true;
9545 }
9546 return false;
9547}
9548
9550 bool IsLittleEndian) {
9551 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9552
9553 BitMask.clearAllBits();
9554 EVT VT = BVN.getValueType(0);
9555 unsigned VTSize = VT.getSizeInBits();
9556 APInt ConstValue(VTSize, 0);
9557
9558 unsigned EltWidth = VT.getScalarSizeInBits();
9559
9560 unsigned BitPos = 0;
9561 for (auto OpVal : BVN.op_values()) {
9562 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9563
9564 if (!CN)
9565 return false;
9566 // The elements in a vector register are ordered in reverse byte order
9567 // between little-endian and big-endian modes.
9568 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9569 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9570 BitPos += EltWidth;
9571 }
9572
9573 for (unsigned J = 0; J < 16; ++J) {
9574 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9575 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9576 return false;
9577 if (ExtractValue == 0xFF)
9578 BitMask.setBit(J);
9579 }
9580 return true;
9581}
9582
9583// If this is a case we can't handle, return null and let the default
9584// expansion code take care of it. If we CAN select this case, and if it
9585// selects to a single instruction, return Op. Otherwise, if we can codegen
9586// this case more efficiently than a constant pool load, lower it to the
9587// sequence of ops that should be used.
9588SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9589 SelectionDAG &DAG) const {
9590 SDLoc dl(Op);
9591 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9592 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9593
9594 if (Subtarget.hasP10Vector()) {
9595 APInt BitMask(32, 0);
9596 // If the value of the vector is all zeros or all ones,
9597 // we do not convert it to MTVSRBMI.
9598 // The xxleqv instruction sets a vector with all ones.
9599 // The xxlxor instruction sets a vector with all zeros.
9600 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9601 BitMask != 0 && BitMask != 0xffff) {
9602 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9603 MachineSDNode *MSDNode =
9604 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9605 SDValue SDV = SDValue(MSDNode, 0);
9606 EVT DVT = BVN->getValueType(0);
9607 EVT SVT = SDV.getValueType();
9608 if (SVT != DVT) {
9609 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9610 }
9611 return SDV;
9612 }
9613 // Recognize build vector patterns to emit VSX vector instructions
9614 // instead of loading value from memory.
9615 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9616 return VecPat;
9617 }
9618 // Check if this is a splat of a constant value.
9619 APInt APSplatBits, APSplatUndef;
9620 unsigned SplatBitSize;
9621 bool HasAnyUndefs;
9622 bool BVNIsConstantSplat =
9623 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9624 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9625
9626 // If it is a splat of a double, check if we can shrink it to a 32 bit
9627 // non-denormal float which when converted back to double gives us the same
9628 // double. This is to exploit the XXSPLTIDP instruction.
9629 // If we lose precision, we use XXSPLTI32DX.
9630 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9631 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9632 // Check the type first to short-circuit so we don't modify APSplatBits if
9633 // this block isn't executed.
9634 if ((Op->getValueType(0) == MVT::v2f64) &&
9635 convertToNonDenormSingle(APSplatBits)) {
9636 SDValue SplatNode = DAG.getNode(
9637 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9638 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9639 return DAG.getBitcast(Op.getValueType(), SplatNode);
9640 } else {
9641 // We may lose precision, so we have to use XXSPLTI32DX.
9642
9643 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9644 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9645 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9646
9647 if (!Hi || !Lo)
9648 // If either load is 0, then we should generate XXLXOR to set to 0.
9649 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9650
9651 if (Hi)
9652 SplatNode = DAG.getNode(
9653 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9654 DAG.getTargetConstant(0, dl, MVT::i32),
9655 DAG.getTargetConstant(Hi, dl, MVT::i32));
9656
9657 if (Lo)
9658 SplatNode =
9659 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9660 DAG.getTargetConstant(1, dl, MVT::i32),
9661 DAG.getTargetConstant(Lo, dl, MVT::i32));
9662
9663 return DAG.getBitcast(Op.getValueType(), SplatNode);
9664 }
9665 }
9666
9667 bool IsSplat64 = false;
9668 uint64_t SplatBits = 0;
9669 int32_t SextVal = 0;
9670 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9671 SplatBits = APSplatBits.getZExtValue();
9672 if (SplatBitSize <= 32) {
9673 SextVal = SignExtend32(SplatBits, SplatBitSize);
9674 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9675 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9676 bool P9Vector = Subtarget.hasP9Vector();
9677 int32_t Hi = P9Vector ? 127 : 15;
9678 int32_t Lo = P9Vector ? -128 : -16;
9679 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9680 SextVal = static_cast<int32_t>(SplatBits);
9681 }
9682 }
9683
9684 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9685 unsigned NewOpcode = PPCISD::LD_SPLAT;
9686
9687 // Handle load-and-splat patterns as we have instructions that will do this
9688 // in one go.
9689 if (DAG.isSplatValue(Op, true) &&
9690 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9691 const SDValue *InputLoad = &Op.getOperand(0);
9692 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9693
9694 // If the input load is an extending load, it will be an i32 -> i64
9695 // extending load and isValidSplatLoad() will update NewOpcode.
9696 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9697 unsigned ElementSize =
9698 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9699
9700 assert(((ElementSize == 2 * MemorySize)
9701 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9702 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9703 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9704 "Unmatched element size and opcode!\n");
9705
9706 // Checking for a single use of this load, we have to check for vector
9707 // width (128 bits) / ElementSize uses (since each operand of the
9708 // BUILD_VECTOR is a separate use of the value.
9709 unsigned NumUsesOfInputLD = 128 / ElementSize;
9710 for (SDValue BVInOp : Op->ops())
9711 if (BVInOp.isUndef())
9712 NumUsesOfInputLD--;
9713
9714 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9715 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9716 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9717 // 15", but function IsValidSplatLoad() now will only return true when
9718 // the data at index 0 is not nullptr. So we will not get into trouble for
9719 // these cases.
9720 //
9721 // case 1 - lfiwzx/lfiwax
9722 // 1.1: load result is i32 and is sign/zero extend to i64;
9723 // 1.2: build a v2i64 vector type with above loaded value;
9724 // 1.3: the vector has only one value at index 0, others are all undef;
9725 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9726 if (NumUsesOfInputLD == 1 &&
9727 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9728 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9729 Subtarget.hasLFIWAX()))
9730 return SDValue();
9731
9732 // case 2 - lxvr[hb]x
9733 // 2.1: load result is at most i16;
9734 // 2.2: build a vector with above loaded value;
9735 // 2.3: the vector has only one value at index 0, others are all undef;
9736 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9737 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9738 Subtarget.isISA3_1() && ElementSize <= 16)
9739 return SDValue();
9740
9741 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9742 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9743 Subtarget.hasVSX()) {
9744 SDValue Ops[] = {
9745 LD->getChain(), // Chain
9746 LD->getBasePtr(), // Ptr
9747 DAG.getValueType(Op.getValueType()) // VT
9748 };
9749 SDValue LdSplt = DAG.getMemIntrinsicNode(
9750 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9751 LD->getMemoryVT(), LD->getMemOperand());
9752 // Replace all uses of the output chain of the original load with the
9753 // output chain of the new load.
9754 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9755 LdSplt.getValue(1));
9756 return LdSplt;
9757 }
9758 }
9759
9760 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9761 // 32-bits can be lowered to VSX instructions under certain conditions.
9762 // Without VSX, there is no pattern more efficient than expanding the node.
9763 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9764 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9765 Subtarget.hasP8Vector()))
9766 return Op;
9767 return SDValue();
9768 }
9769
9770 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9771 unsigned SplatSize = SplatBitSize / 8;
9772
9773 // First, handle single instruction cases.
9774
9775 // All zeros?
9776 if (SplatBits == 0) {
9777 // Canonicalize all zero vectors to be v4i32.
9778 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9779 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9780 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9781 }
9782 return Op;
9783 }
9784
9785 // We have XXSPLTIW for constant splats four bytes wide.
9786 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9787 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9788 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9789 // turned into a 4-byte splat of 0xABABABAB.
9790 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9791 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9792 Op.getValueType(), DAG, dl);
9793
9794 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9795 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9796 dl);
9797
9798 // We have XXSPLTIB for constant splats one byte wide.
9799 if (Subtarget.hasP9Vector() && SplatSize == 1)
9800 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9801 dl);
9802
9803 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9804 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9805 if (SextVal >= -16 && SextVal <= 15) {
9806 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9807 // generate a splat word with extend for size 8.
9808 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9809 SDValue Res =
9810 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9811 if (SplatSize != 8)
9812 return Res;
9813 SDValue IntrinsicOp =
9814 BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw,
9815 DAG.getBitcast(MVT::v4i32, Res), DAG, dl, MVT::v2i64);
9816 return DAG.getBitcast(Op.getValueType(), IntrinsicOp);
9817 }
9818
9819 // Two instruction sequences.
9820
9821 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9822 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9824 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9825 unsigned IID;
9826 EVT VT;
9827 switch (SplatSize) {
9828 default:
9829 llvm_unreachable("Unexpected type for vector constant.");
9830 case 2:
9831 IID = Intrinsic::ppc_altivec_vupklsb;
9832 VT = MVT::v8i16;
9833 break;
9834 case 4:
9835 IID = Intrinsic::ppc_altivec_vextsb2w;
9836 VT = MVT::v4i32;
9837 break;
9838 case 8:
9839 IID = Intrinsic::ppc_altivec_vextsb2d;
9840 VT = MVT::v2i64;
9841 break;
9842 }
9843 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9844 return DAG.getBitcast(Op->getValueType(0), Extend);
9845 }
9846 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9847
9848 // If this value is in the range [-32,30] and is even, use:
9849 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9850 // If this value is in the range [17,31] and is odd, use:
9851 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9852 // If this value is in the range [-31,-17] and is odd, use:
9853 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9854 // Note the last two are three-instruction sequences.
9855 if (SextVal >= -32 && SextVal <= 31) {
9856 // To avoid having these optimizations undone by constant folding,
9857 // we convert to a pseudo that will be expanded later into one of
9858 // the above forms.
9859 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9860 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9861 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9862 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9863 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9864 if (VT == Op.getValueType())
9865 return RetVal;
9866 else
9867 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9868 }
9869
9870 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9871 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9872 // for fneg/fabs.
9873 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9874 // Make -1 and vspltisw -1:
9875 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9876
9877 // Make the VSLW intrinsic, computing 0x8000_0000.
9878 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9879 OnesV, DAG, dl);
9880
9881 // xor by OnesV to invert it.
9882 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9883 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9884 }
9885
9886 // Check to see if this is a wide variety of vsplti*, binop self cases.
9887 static const signed char SplatCsts[] = {
9888 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9889 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9890 };
9891
9892 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9893 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9894 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9895 int i = SplatCsts[idx];
9896
9897 // Figure out what shift amount will be used by altivec if shifted by i in
9898 // this splat size.
9899 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9900
9901 // vsplti + shl self.
9902 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9903 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9904 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9905 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9906 Intrinsic::ppc_altivec_vslw
9907 };
9908 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9909 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9910 }
9911
9912 // vsplti + srl self.
9913 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9914 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9915 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9916 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9917 Intrinsic::ppc_altivec_vsrw
9918 };
9919 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9920 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9921 }
9922
9923 // vsplti + rol self.
9924 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9925 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9926 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9927 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9928 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9929 Intrinsic::ppc_altivec_vrlw
9930 };
9931 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9932 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9933 }
9934
9935 // t = vsplti c, result = vsldoi t, t, 1
9936 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9937 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9938 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9939 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9940 }
9941 // t = vsplti c, result = vsldoi t, t, 2
9942 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9943 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9944 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9945 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9946 }
9947 // t = vsplti c, result = vsldoi t, t, 3
9948 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9949 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9950 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9951 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9952 }
9953 }
9954
9955 return SDValue();
9956}
9957
9958/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9959/// the specified operations to build the shuffle.
9961 SDValue RHS, SelectionDAG &DAG,
9962 const SDLoc &dl) {
9963 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9964 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9965 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9966
9967 enum {
9968 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9969 OP_VMRGHW,
9970 OP_VMRGLW,
9971 OP_VSPLTISW0,
9972 OP_VSPLTISW1,
9973 OP_VSPLTISW2,
9974 OP_VSPLTISW3,
9975 OP_VSLDOI4,
9976 OP_VSLDOI8,
9977 OP_VSLDOI12
9978 };
9979
9980 if (OpNum == OP_COPY) {
9981 if (LHSID == (1*9+2)*9+3) return LHS;
9982 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9983 return RHS;
9984 }
9985
9986 SDValue OpLHS, OpRHS;
9987 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9988 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9989
9990 int ShufIdxs[16];
9991 switch (OpNum) {
9992 default: llvm_unreachable("Unknown i32 permute!");
9993 case OP_VMRGHW:
9994 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9995 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9996 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9997 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9998 break;
9999 case OP_VMRGLW:
10000 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10001 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10002 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10003 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10004 break;
10005 case OP_VSPLTISW0:
10006 for (unsigned i = 0; i != 16; ++i)
10007 ShufIdxs[i] = (i&3)+0;
10008 break;
10009 case OP_VSPLTISW1:
10010 for (unsigned i = 0; i != 16; ++i)
10011 ShufIdxs[i] = (i&3)+4;
10012 break;
10013 case OP_VSPLTISW2:
10014 for (unsigned i = 0; i != 16; ++i)
10015 ShufIdxs[i] = (i&3)+8;
10016 break;
10017 case OP_VSPLTISW3:
10018 for (unsigned i = 0; i != 16; ++i)
10019 ShufIdxs[i] = (i&3)+12;
10020 break;
10021 case OP_VSLDOI4:
10022 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10023 case OP_VSLDOI8:
10024 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10025 case OP_VSLDOI12:
10026 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10027 }
10028 EVT VT = OpLHS.getValueType();
10029 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10030 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10031 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10032 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10033}
10034
10035/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10036/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10037/// SDValue.
10038SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10039 SelectionDAG &DAG) const {
10040 const unsigned BytesInVector = 16;
10041 bool IsLE = Subtarget.isLittleEndian();
10042 SDLoc dl(N);
10043 SDValue V1 = N->getOperand(0);
10044 SDValue V2 = N->getOperand(1);
10045 unsigned ShiftElts = 0, InsertAtByte = 0;
10046 bool Swap = false;
10047
10048 // Shifts required to get the byte we want at element 7.
10049 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10050 0, 15, 14, 13, 12, 11, 10, 9};
10051 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10052 1, 2, 3, 4, 5, 6, 7, 8};
10053
10054 ArrayRef<int> Mask = N->getMask();
10055 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10056
10057 // For each mask element, find out if we're just inserting something
10058 // from V2 into V1 or vice versa.
10059 // Possible permutations inserting an element from V2 into V1:
10060 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10061 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10062 // ...
10063 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10064 // Inserting from V1 into V2 will be similar, except mask range will be
10065 // [16,31].
10066
10067 bool FoundCandidate = false;
10068 // If both vector operands for the shuffle are the same vector, the mask
10069 // will contain only elements from the first one and the second one will be
10070 // undef.
10071 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10072 // Go through the mask of half-words to find an element that's being moved
10073 // from one vector to the other.
10074 for (unsigned i = 0; i < BytesInVector; ++i) {
10075 unsigned CurrentElement = Mask[i];
10076 // If 2nd operand is undefined, we should only look for element 7 in the
10077 // Mask.
10078 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10079 continue;
10080
10081 bool OtherElementsInOrder = true;
10082 // Examine the other elements in the Mask to see if they're in original
10083 // order.
10084 for (unsigned j = 0; j < BytesInVector; ++j) {
10085 if (j == i)
10086 continue;
10087 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10088 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10089 // in which we always assume we're always picking from the 1st operand.
10090 int MaskOffset =
10091 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10092 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10093 OtherElementsInOrder = false;
10094 break;
10095 }
10096 }
10097 // If other elements are in original order, we record the number of shifts
10098 // we need to get the element we want into element 7. Also record which byte
10099 // in the vector we should insert into.
10100 if (OtherElementsInOrder) {
10101 // If 2nd operand is undefined, we assume no shifts and no swapping.
10102 if (V2.isUndef()) {
10103 ShiftElts = 0;
10104 Swap = false;
10105 } else {
10106 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10107 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10108 : BigEndianShifts[CurrentElement & 0xF];
10109 Swap = CurrentElement < BytesInVector;
10110 }
10111 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10112 FoundCandidate = true;
10113 break;
10114 }
10115 }
10116
10117 if (!FoundCandidate)
10118 return SDValue();
10119
10120 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10121 // optionally with VECSHL if shift is required.
10122 if (Swap)
10123 std::swap(V1, V2);
10124 if (V2.isUndef())
10125 V2 = V1;
10126 if (ShiftElts) {
10127 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10128 DAG.getConstant(ShiftElts, dl, MVT::i32));
10129 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10130 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10131 }
10132 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10133 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10134}
10135
10136/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10137/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10138/// SDValue.
10139SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10140 SelectionDAG &DAG) const {
10141 const unsigned NumHalfWords = 8;
10142 const unsigned BytesInVector = NumHalfWords * 2;
10143 // Check that the shuffle is on half-words.
10144 if (!isNByteElemShuffleMask(N, 2, 1))
10145 return SDValue();
10146
10147 bool IsLE = Subtarget.isLittleEndian();
10148 SDLoc dl(N);
10149 SDValue V1 = N->getOperand(0);
10150 SDValue V2 = N->getOperand(1);
10151 unsigned ShiftElts = 0, InsertAtByte = 0;
10152 bool Swap = false;
10153
10154 // Shifts required to get the half-word we want at element 3.
10155 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10156 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10157
10158 uint32_t Mask = 0;
10159 uint32_t OriginalOrderLow = 0x1234567;
10160 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10161 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10162 // 32-bit space, only need 4-bit nibbles per element.
10163 for (unsigned i = 0; i < NumHalfWords; ++i) {
10164 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10165 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10166 }
10167
10168 // For each mask element, find out if we're just inserting something
10169 // from V2 into V1 or vice versa. Possible permutations inserting an element
10170 // from V2 into V1:
10171 // X, 1, 2, 3, 4, 5, 6, 7
10172 // 0, X, 2, 3, 4, 5, 6, 7
10173 // 0, 1, X, 3, 4, 5, 6, 7
10174 // 0, 1, 2, X, 4, 5, 6, 7
10175 // 0, 1, 2, 3, X, 5, 6, 7
10176 // 0, 1, 2, 3, 4, X, 6, 7
10177 // 0, 1, 2, 3, 4, 5, X, 7
10178 // 0, 1, 2, 3, 4, 5, 6, X
10179 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10180
10181 bool FoundCandidate = false;
10182 // Go through the mask of half-words to find an element that's being moved
10183 // from one vector to the other.
10184 for (unsigned i = 0; i < NumHalfWords; ++i) {
10185 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10186 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10187 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10188 uint32_t TargetOrder = 0x0;
10189
10190 // If both vector operands for the shuffle are the same vector, the mask
10191 // will contain only elements from the first one and the second one will be
10192 // undef.
10193 if (V2.isUndef()) {
10194 ShiftElts = 0;
10195 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10196 TargetOrder = OriginalOrderLow;
10197 Swap = false;
10198 // Skip if not the correct element or mask of other elements don't equal
10199 // to our expected order.
10200 if (MaskOneElt == VINSERTHSrcElem &&
10201 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10202 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10203 FoundCandidate = true;
10204 break;
10205 }
10206 } else { // If both operands are defined.
10207 // Target order is [8,15] if the current mask is between [0,7].
10208 TargetOrder =
10209 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10210 // Skip if mask of other elements don't equal our expected order.
10211 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10212 // We only need the last 3 bits for the number of shifts.
10213 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10214 : BigEndianShifts[MaskOneElt & 0x7];
10215 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10216 Swap = MaskOneElt < NumHalfWords;
10217 FoundCandidate = true;
10218 break;
10219 }
10220 }
10221 }
10222
10223 if (!FoundCandidate)
10224 return SDValue();
10225
10226 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10227 // optionally with VECSHL if shift is required.
10228 if (Swap)
10229 std::swap(V1, V2);
10230 if (V2.isUndef())
10231 V2 = V1;
10232 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10233 if (ShiftElts) {
10234 // Double ShiftElts because we're left shifting on v16i8 type.
10235 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10236 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10237 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10238 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10239 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10240 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10241 }
10242 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10243 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10244 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10245 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10246}
10247
10248/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10249/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10250/// return the default SDValue.
10251SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10252 SelectionDAG &DAG) const {
10253 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10254 // to v16i8. Peek through the bitcasts to get the actual operands.
10257
10258 auto ShuffleMask = SVN->getMask();
10259 SDValue VecShuffle(SVN, 0);
10260 SDLoc DL(SVN);
10261
10262 // Check that we have a four byte shuffle.
10263 if (!isNByteElemShuffleMask(SVN, 4, 1))
10264 return SDValue();
10265
10266 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10267 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10268 std::swap(LHS, RHS);
10270 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10271 if (!CommutedSV)
10272 return SDValue();
10273 ShuffleMask = CommutedSV->getMask();
10274 }
10275
10276 // Ensure that the RHS is a vector of constants.
10277 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10278 if (!BVN)
10279 return SDValue();
10280
10281 // Check if RHS is a splat of 4-bytes (or smaller).
10282 APInt APSplatValue, APSplatUndef;
10283 unsigned SplatBitSize;
10284 bool HasAnyUndefs;
10285 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10286 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10287 SplatBitSize > 32)
10288 return SDValue();
10289
10290 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10291 // The instruction splats a constant C into two words of the source vector
10292 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10293 // Thus we check that the shuffle mask is the equivalent of
10294 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10295 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10296 // within each word are consecutive, so we only need to check the first byte.
10297 SDValue Index;
10298 bool IsLE = Subtarget.isLittleEndian();
10299 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10300 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10301 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10302 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10303 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10304 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10305 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10306 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10307 else
10308 return SDValue();
10309
10310 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10311 // for XXSPLTI32DX.
10312 unsigned SplatVal = APSplatValue.getZExtValue();
10313 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10314 SplatVal |= (SplatVal << SplatBitSize);
10315
10316 SDValue SplatNode = DAG.getNode(
10317 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10318 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10319 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10320}
10321
10322/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10323/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10324/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10325/// i.e (or (shl x, C1), (srl x, 128-C1)).
10326SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10327 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10328 assert(Op.getValueType() == MVT::v1i128 &&
10329 "Only set v1i128 as custom, other type shouldn't reach here!");
10330 SDLoc dl(Op);
10331 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10332 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10333 unsigned SHLAmt = N1.getConstantOperandVal(0);
10334 if (SHLAmt % 8 == 0) {
10335 std::array<int, 16> Mask;
10336 std::iota(Mask.begin(), Mask.end(), 0);
10337 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10338 if (SDValue Shuffle =
10339 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10340 DAG.getUNDEF(MVT::v16i8), Mask))
10341 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10342 }
10343 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10344 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10345 DAG.getConstant(SHLAmt, dl, MVT::i32));
10346 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10347 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10348 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10349 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10350}
10351
10352/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10353/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10354/// return the code it can be lowered into. Worst case, it can always be
10355/// lowered into a vperm.
10356SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10357 SelectionDAG &DAG) const {
10358 SDLoc dl(Op);
10359 SDValue V1 = Op.getOperand(0);
10360 SDValue V2 = Op.getOperand(1);
10361 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10362
10363 // Any nodes that were combined in the target-independent combiner prior
10364 // to vector legalization will not be sent to the target combine. Try to
10365 // combine it here.
10366 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10367 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10368 return NewShuffle;
10369 Op = NewShuffle;
10371 V1 = Op.getOperand(0);
10372 V2 = Op.getOperand(1);
10373 }
10374 EVT VT = Op.getValueType();
10375 bool isLittleEndian = Subtarget.isLittleEndian();
10376
10377 unsigned ShiftElts, InsertAtByte;
10378 bool Swap = false;
10379
10380 // If this is a load-and-splat, we can do that with a single instruction
10381 // in some cases. However if the load has multiple uses, we don't want to
10382 // combine it because that will just produce multiple loads.
10383 bool IsPermutedLoad = false;
10384 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10385 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10386 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10387 InputLoad->hasOneUse()) {
10388 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10389 int SplatIdx =
10390 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10391
10392 // The splat index for permuted loads will be in the left half of the vector
10393 // which is strictly wider than the loaded value by 8 bytes. So we need to
10394 // adjust the splat index to point to the correct address in memory.
10395 if (IsPermutedLoad) {
10396 assert((isLittleEndian || IsFourByte) &&
10397 "Unexpected size for permuted load on big endian target");
10398 SplatIdx += IsFourByte ? 2 : 1;
10399 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10400 "Splat of a value outside of the loaded memory");
10401 }
10402
10403 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10404 // For 4-byte load-and-splat, we need Power9.
10405 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10406 uint64_t Offset = 0;
10407 if (IsFourByte)
10408 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10409 else
10410 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10411
10412 // If the width of the load is the same as the width of the splat,
10413 // loading with an offset would load the wrong memory.
10414 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10415 Offset = 0;
10416
10417 SDValue BasePtr = LD->getBasePtr();
10418 if (Offset != 0)
10420 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10421 SDValue Ops[] = {
10422 LD->getChain(), // Chain
10423 BasePtr, // BasePtr
10424 DAG.getValueType(Op.getValueType()) // VT
10425 };
10426 SDVTList VTL =
10427 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10428 SDValue LdSplt =
10429 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10430 Ops, LD->getMemoryVT(), LD->getMemOperand());
10431 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10432 if (LdSplt.getValueType() != SVOp->getValueType(0))
10433 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10434 return LdSplt;
10435 }
10436 }
10437
10438 // All v2i64 and v2f64 shuffles are legal
10439 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10440 return Op;
10441
10442 if (Subtarget.hasP9Vector() &&
10443 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10444 isLittleEndian)) {
10445 if (V2.isUndef())
10446 V2 = V1;
10447 else if (Swap)
10448 std::swap(V1, V2);
10449 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10450 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10451 if (ShiftElts) {
10452 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10453 DAG.getConstant(ShiftElts, dl, MVT::i32));
10454 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10455 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10456 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10457 }
10458 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10459 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10460 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10461 }
10462
10463 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10464 SDValue SplatInsertNode;
10465 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10466 return SplatInsertNode;
10467 }
10468
10469 if (Subtarget.hasP9Altivec()) {
10470 SDValue NewISDNode;
10471 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10472 return NewISDNode;
10473
10474 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10475 return NewISDNode;
10476 }
10477
10478 if (Subtarget.hasVSX() &&
10479 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10480 if (Swap)
10481 std::swap(V1, V2);
10482 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10483 SDValue Conv2 =
10484 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10485
10486 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10487 DAG.getConstant(ShiftElts, dl, MVT::i32));
10488 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10489 }
10490
10491 if (Subtarget.hasVSX() &&
10492 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10493 if (Swap)
10494 std::swap(V1, V2);
10495 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10496 SDValue Conv2 =
10497 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10498
10499 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10500 DAG.getConstant(ShiftElts, dl, MVT::i32));
10501 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10502 }
10503
10504 if (Subtarget.hasP9Vector()) {
10505 if (PPC::isXXBRHShuffleMask(SVOp)) {
10506 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10507 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10508 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10509 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10510 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10511 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10512 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10513 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10514 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10515 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10516 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10517 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10518 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10519 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10520 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10521 }
10522 }
10523
10524 if (Subtarget.hasVSX()) {
10525 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10526 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10527
10528 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10529 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10530 DAG.getConstant(SplatIdx, dl, MVT::i32));
10531 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10532 }
10533
10534 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10535 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10536 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10537 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10538 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10539 }
10540 }
10541
10542 // Cases that are handled by instructions that take permute immediates
10543 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10544 // selected by the instruction selector.
10545 if (V2.isUndef()) {
10546 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10547 PPC::isSplatShuffleMask(SVOp, 2) ||
10548 PPC::isSplatShuffleMask(SVOp, 4) ||
10549 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10550 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10551 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10552 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10553 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10554 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10555 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10556 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10557 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10558 (Subtarget.hasP8Altivec() && (
10559 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10560 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10561 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10562 return Op;
10563 }
10564 }
10565
10566 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10567 // and produce a fixed permutation. If any of these match, do not lower to
10568 // VPERM.
10569 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10570 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10571 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10572 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10573 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10574 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10575 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10576 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10577 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10578 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10579 (Subtarget.hasP8Altivec() && (
10580 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10581 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10582 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10583 return Op;
10584
10585 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10586 // perfect shuffle table to emit an optimal matching sequence.
10587 ArrayRef<int> PermMask = SVOp->getMask();
10588
10589 if (!DisablePerfectShuffle && !isLittleEndian) {
10590 unsigned PFIndexes[4];
10591 bool isFourElementShuffle = true;
10592 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10593 ++i) { // Element number
10594 unsigned EltNo = 8; // Start out undef.
10595 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10596 if (PermMask[i * 4 + j] < 0)
10597 continue; // Undef, ignore it.
10598
10599 unsigned ByteSource = PermMask[i * 4 + j];
10600 if ((ByteSource & 3) != j) {
10601 isFourElementShuffle = false;
10602 break;
10603 }
10604
10605 if (EltNo == 8) {
10606 EltNo = ByteSource / 4;
10607 } else if (EltNo != ByteSource / 4) {
10608 isFourElementShuffle = false;
10609 break;
10610 }
10611 }
10612 PFIndexes[i] = EltNo;
10613 }
10614
10615 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10616 // perfect shuffle vector to determine if it is cost effective to do this as
10617 // discrete instructions, or whether we should use a vperm.
10618 // For now, we skip this for little endian until such time as we have a
10619 // little-endian perfect shuffle table.
10620 if (isFourElementShuffle) {
10621 // Compute the index in the perfect shuffle table.
10622 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10623 PFIndexes[2] * 9 + PFIndexes[3];
10624
10625 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10626 unsigned Cost = (PFEntry >> 30);
10627
10628 // Determining when to avoid vperm is tricky. Many things affect the cost
10629 // of vperm, particularly how many times the perm mask needs to be
10630 // computed. For example, if the perm mask can be hoisted out of a loop or
10631 // is already used (perhaps because there are multiple permutes with the
10632 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10633 // permute mask out of the loop requires an extra register.
10634 //
10635 // As a compromise, we only emit discrete instructions if the shuffle can
10636 // be generated in 3 or fewer operations. When we have loop information
10637 // available, if this block is within a loop, we should avoid using vperm
10638 // for 3-operation perms and use a constant pool load instead.
10639 if (Cost < 3)
10640 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10641 }
10642 }
10643
10644 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10645 // vector that will get spilled to the constant pool.
10646 if (V2.isUndef()) V2 = V1;
10647
10648 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10649}
10650
10651SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10652 ArrayRef<int> PermMask, EVT VT,
10653 SDValue V1, SDValue V2) const {
10654 unsigned Opcode = PPCISD::VPERM;
10655 EVT ValType = V1.getValueType();
10656 SDLoc dl(Op);
10657 bool NeedSwap = false;
10658 bool isLittleEndian = Subtarget.isLittleEndian();
10659 bool isPPC64 = Subtarget.isPPC64();
10660
10661 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10662 (V1->hasOneUse() || V2->hasOneUse())) {
10663 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10664 "XXPERM instead\n");
10665 Opcode = PPCISD::XXPERM;
10666
10667 // The second input to XXPERM is also an output so if the second input has
10668 // multiple uses then copying is necessary, as a result we want the
10669 // single-use operand to be used as the second input to prevent copying.
10670 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10671 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10672 std::swap(V1, V2);
10673 NeedSwap = !NeedSwap;
10674 }
10675 }
10676
10677 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10678 // that it is in input element units, not in bytes. Convert now.
10679
10680 // For little endian, the order of the input vectors is reversed, and
10681 // the permutation mask is complemented with respect to 31. This is
10682 // necessary to produce proper semantics with the big-endian-based vperm
10683 // instruction.
10684 EVT EltVT = V1.getValueType().getVectorElementType();
10685 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10686
10687 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10688 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10689
10690 /*
10691 Vectors will be appended like so: [ V1 | v2 ]
10692 XXSWAPD on V1:
10693 [ A | B | C | D ] -> [ C | D | A | B ]
10694 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10695 i.e. index of A, B += 8, and index of C, D -= 8.
10696 XXSWAPD on V2:
10697 [ E | F | G | H ] -> [ G | H | E | F ]
10698 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10699 i.e. index of E, F += 8, index of G, H -= 8
10700 Swap V1 and V2:
10701 [ V1 | V2 ] -> [ V2 | V1 ]
10702 0-15 16-31 0-15 16-31
10703 i.e. index of V1 += 16, index of V2 -= 16
10704 */
10705
10706 SmallVector<SDValue, 16> ResultMask;
10707 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10708 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10709
10710 if (V1HasXXSWAPD) {
10711 if (SrcElt < 8)
10712 SrcElt += 8;
10713 else if (SrcElt < 16)
10714 SrcElt -= 8;
10715 }
10716 if (V2HasXXSWAPD) {
10717 if (SrcElt > 23)
10718 SrcElt -= 8;
10719 else if (SrcElt > 15)
10720 SrcElt += 8;
10721 }
10722 if (NeedSwap) {
10723 if (SrcElt < 16)
10724 SrcElt += 16;
10725 else
10726 SrcElt -= 16;
10727 }
10728 for (unsigned j = 0; j != BytesPerElement; ++j)
10729 if (isLittleEndian)
10730 ResultMask.push_back(
10731 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10732 else
10733 ResultMask.push_back(
10734 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10735 }
10736
10737 if (V1HasXXSWAPD) {
10738 dl = SDLoc(V1->getOperand(0));
10739 V1 = V1->getOperand(0)->getOperand(1);
10740 }
10741 if (V2HasXXSWAPD) {
10742 dl = SDLoc(V2->getOperand(0));
10743 V2 = V2->getOperand(0)->getOperand(1);
10744 }
10745
10746 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10747 if (ValType != MVT::v2f64)
10748 V1 = DAG.getBitcast(MVT::v2f64, V1);
10749 if (V2.getValueType() != MVT::v2f64)
10750 V2 = DAG.getBitcast(MVT::v2f64, V2);
10751 }
10752
10753 ShufflesHandledWithVPERM++;
10754 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10755 LLVM_DEBUG({
10756 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10757 if (Opcode == PPCISD::XXPERM) {
10758 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10759 } else {
10760 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10761 }
10762 SVOp->dump();
10763 dbgs() << "With the following permute control vector:\n";
10764 VPermMask.dump();
10765 });
10766
10767 if (Opcode == PPCISD::XXPERM)
10768 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10769
10770 // Only need to place items backwards in LE,
10771 // the mask was properly calculated.
10772 if (isLittleEndian)
10773 std::swap(V1, V2);
10774
10775 SDValue VPERMNode =
10776 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10777
10778 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10779 return VPERMNode;
10780}
10781
10782/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10783/// vector comparison. If it is, return true and fill in Opc/isDot with
10784/// information about the intrinsic.
10785static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10786 bool &isDot, const PPCSubtarget &Subtarget) {
10787 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10788 CompareOpc = -1;
10789 isDot = false;
10790 switch (IntrinsicID) {
10791 default:
10792 return false;
10793 // Comparison predicates.
10794 case Intrinsic::ppc_altivec_vcmpbfp_p:
10795 CompareOpc = 966;
10796 isDot = true;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10799 CompareOpc = 198;
10800 isDot = true;
10801 break;
10802 case Intrinsic::ppc_altivec_vcmpequb_p:
10803 CompareOpc = 6;
10804 isDot = true;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpequh_p:
10807 CompareOpc = 70;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpequw_p:
10811 CompareOpc = 134;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpequd_p:
10815 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10816 CompareOpc = 199;
10817 isDot = true;
10818 } else
10819 return false;
10820 break;
10821 case Intrinsic::ppc_altivec_vcmpneb_p:
10822 case Intrinsic::ppc_altivec_vcmpneh_p:
10823 case Intrinsic::ppc_altivec_vcmpnew_p:
10824 case Intrinsic::ppc_altivec_vcmpnezb_p:
10825 case Intrinsic::ppc_altivec_vcmpnezh_p:
10826 case Intrinsic::ppc_altivec_vcmpnezw_p:
10827 if (Subtarget.hasP9Altivec()) {
10828 switch (IntrinsicID) {
10829 default:
10830 llvm_unreachable("Unknown comparison intrinsic.");
10831 case Intrinsic::ppc_altivec_vcmpneb_p:
10832 CompareOpc = 7;
10833 break;
10834 case Intrinsic::ppc_altivec_vcmpneh_p:
10835 CompareOpc = 71;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpnew_p:
10838 CompareOpc = 135;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpnezb_p:
10841 CompareOpc = 263;
10842 break;
10843 case Intrinsic::ppc_altivec_vcmpnezh_p:
10844 CompareOpc = 327;
10845 break;
10846 case Intrinsic::ppc_altivec_vcmpnezw_p:
10847 CompareOpc = 391;
10848 break;
10849 }
10850 isDot = true;
10851 } else
10852 return false;
10853 break;
10854 case Intrinsic::ppc_altivec_vcmpgefp_p:
10855 CompareOpc = 454;
10856 isDot = true;
10857 break;
10858 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10859 CompareOpc = 710;
10860 isDot = true;
10861 break;
10862 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10863 CompareOpc = 774;
10864 isDot = true;
10865 break;
10866 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10867 CompareOpc = 838;
10868 isDot = true;
10869 break;
10870 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10871 CompareOpc = 902;
10872 isDot = true;
10873 break;
10874 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10875 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10876 CompareOpc = 967;
10877 isDot = true;
10878 } else
10879 return false;
10880 break;
10881 case Intrinsic::ppc_altivec_vcmpgtub_p:
10882 CompareOpc = 518;
10883 isDot = true;
10884 break;
10885 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10886 CompareOpc = 582;
10887 isDot = true;
10888 break;
10889 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10890 CompareOpc = 646;
10891 isDot = true;
10892 break;
10893 case Intrinsic::ppc_altivec_vcmpgtud_p:
10894 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10895 CompareOpc = 711;
10896 isDot = true;
10897 } else
10898 return false;
10899 break;
10900
10901 case Intrinsic::ppc_altivec_vcmpequq:
10902 case Intrinsic::ppc_altivec_vcmpgtsq:
10903 case Intrinsic::ppc_altivec_vcmpgtuq:
10904 if (!Subtarget.isISA3_1())
10905 return false;
10906 switch (IntrinsicID) {
10907 default:
10908 llvm_unreachable("Unknown comparison intrinsic.");
10909 case Intrinsic::ppc_altivec_vcmpequq:
10910 CompareOpc = 455;
10911 break;
10912 case Intrinsic::ppc_altivec_vcmpgtsq:
10913 CompareOpc = 903;
10914 break;
10915 case Intrinsic::ppc_altivec_vcmpgtuq:
10916 CompareOpc = 647;
10917 break;
10918 }
10919 break;
10920
10921 // VSX predicate comparisons use the same infrastructure
10922 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10923 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10924 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10925 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10926 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10927 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10928 if (Subtarget.hasVSX()) {
10929 switch (IntrinsicID) {
10930 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10931 CompareOpc = 99;
10932 break;
10933 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10934 CompareOpc = 115;
10935 break;
10936 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10937 CompareOpc = 107;
10938 break;
10939 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10940 CompareOpc = 67;
10941 break;
10942 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10943 CompareOpc = 83;
10944 break;
10945 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10946 CompareOpc = 75;
10947 break;
10948 }
10949 isDot = true;
10950 } else
10951 return false;
10952 break;
10953
10954 // Normal Comparisons.
10955 case Intrinsic::ppc_altivec_vcmpbfp:
10956 CompareOpc = 966;
10957 break;
10958 case Intrinsic::ppc_altivec_vcmpeqfp:
10959 CompareOpc = 198;
10960 break;
10961 case Intrinsic::ppc_altivec_vcmpequb:
10962 CompareOpc = 6;
10963 break;
10964 case Intrinsic::ppc_altivec_vcmpequh:
10965 CompareOpc = 70;
10966 break;
10967 case Intrinsic::ppc_altivec_vcmpequw:
10968 CompareOpc = 134;
10969 break;
10970 case Intrinsic::ppc_altivec_vcmpequd:
10971 if (Subtarget.hasP8Altivec())
10972 CompareOpc = 199;
10973 else
10974 return false;
10975 break;
10976 case Intrinsic::ppc_altivec_vcmpneb:
10977 case Intrinsic::ppc_altivec_vcmpneh:
10978 case Intrinsic::ppc_altivec_vcmpnew:
10979 case Intrinsic::ppc_altivec_vcmpnezb:
10980 case Intrinsic::ppc_altivec_vcmpnezh:
10981 case Intrinsic::ppc_altivec_vcmpnezw:
10982 if (Subtarget.hasP9Altivec())
10983 switch (IntrinsicID) {
10984 default:
10985 llvm_unreachable("Unknown comparison intrinsic.");
10986 case Intrinsic::ppc_altivec_vcmpneb:
10987 CompareOpc = 7;
10988 break;
10989 case Intrinsic::ppc_altivec_vcmpneh:
10990 CompareOpc = 71;
10991 break;
10992 case Intrinsic::ppc_altivec_vcmpnew:
10993 CompareOpc = 135;
10994 break;
10995 case Intrinsic::ppc_altivec_vcmpnezb:
10996 CompareOpc = 263;
10997 break;
10998 case Intrinsic::ppc_altivec_vcmpnezh:
10999 CompareOpc = 327;
11000 break;
11001 case Intrinsic::ppc_altivec_vcmpnezw:
11002 CompareOpc = 391;
11003 break;
11004 }
11005 else
11006 return false;
11007 break;
11008 case Intrinsic::ppc_altivec_vcmpgefp:
11009 CompareOpc = 454;
11010 break;
11011 case Intrinsic::ppc_altivec_vcmpgtfp:
11012 CompareOpc = 710;
11013 break;
11014 case Intrinsic::ppc_altivec_vcmpgtsb:
11015 CompareOpc = 774;
11016 break;
11017 case Intrinsic::ppc_altivec_vcmpgtsh:
11018 CompareOpc = 838;
11019 break;
11020 case Intrinsic::ppc_altivec_vcmpgtsw:
11021 CompareOpc = 902;
11022 break;
11023 case Intrinsic::ppc_altivec_vcmpgtsd:
11024 if (Subtarget.hasP8Altivec())
11025 CompareOpc = 967;
11026 else
11027 return false;
11028 break;
11029 case Intrinsic::ppc_altivec_vcmpgtub:
11030 CompareOpc = 518;
11031 break;
11032 case Intrinsic::ppc_altivec_vcmpgtuh:
11033 CompareOpc = 582;
11034 break;
11035 case Intrinsic::ppc_altivec_vcmpgtuw:
11036 CompareOpc = 646;
11037 break;
11038 case Intrinsic::ppc_altivec_vcmpgtud:
11039 if (Subtarget.hasP8Altivec())
11040 CompareOpc = 711;
11041 else
11042 return false;
11043 break;
11044 case Intrinsic::ppc_altivec_vcmpequq_p:
11045 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11046 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11047 if (!Subtarget.isISA3_1())
11048 return false;
11049 switch (IntrinsicID) {
11050 default:
11051 llvm_unreachable("Unknown comparison intrinsic.");
11052 case Intrinsic::ppc_altivec_vcmpequq_p:
11053 CompareOpc = 455;
11054 break;
11055 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11056 CompareOpc = 903;
11057 break;
11058 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11059 CompareOpc = 647;
11060 break;
11061 }
11062 isDot = true;
11063 break;
11064 }
11065 return true;
11066}
11067
11068/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11069/// lower, do it, otherwise return null.
11070SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11071 SelectionDAG &DAG) const {
11072 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11073
11074 SDLoc dl(Op);
11075 // Note: BCD instructions expect the immediate operand in vector form (v4i32),
11076 // but the builtin provides it as a scalar. To satisfy the instruction
11077 // encoding, we splat the scalar across all lanes using SPLAT_VECTOR.
11078 auto MapNodeWithSplatVector =
11079 [&](unsigned Opcode,
11080 std::initializer_list<SDValue> ExtraOps = {}) -> SDValue {
11081 SDValue SplatVal =
11082 DAG.getNode(ISD::SPLAT_VECTOR, dl, MVT::v4i32, Op.getOperand(2));
11083
11084 SmallVector<SDValue, 4> Ops{SplatVal, Op.getOperand(1)};
11085 Ops.append(ExtraOps.begin(), ExtraOps.end());
11086 return DAG.getNode(Opcode, dl, MVT::v16i8, Ops);
11087 };
11088
11089 switch (IntrinsicID) {
11090 case Intrinsic::thread_pointer:
11091 // Reads the thread pointer register, used for __builtin_thread_pointer.
11092 if (Subtarget.isPPC64())
11093 return DAG.getRegister(PPC::X13, MVT::i64);
11094 return DAG.getRegister(PPC::R2, MVT::i32);
11095
11096 case Intrinsic::ppc_rldimi: {
11097 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11098 SDValue Src = Op.getOperand(1);
11099 APInt Mask = Op.getConstantOperandAPInt(4);
11100 if (Mask.isZero())
11101 return Op.getOperand(2);
11102 if (Mask.isAllOnes())
11103 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11104 uint64_t SH = Op.getConstantOperandVal(3);
11105 unsigned MB = 0, ME = 0;
11106 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11107 report_fatal_error("invalid rldimi mask!");
11108 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11109 if (ME < 63 - SH) {
11110 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11111 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11112 } else if (ME > 63 - SH) {
11113 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11114 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11115 }
11116 return SDValue(
11117 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11118 {Op.getOperand(2), Src,
11119 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11120 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11121 0);
11122 }
11123
11124 case Intrinsic::ppc_rlwimi: {
11125 APInt Mask = Op.getConstantOperandAPInt(4);
11126 if (Mask.isZero())
11127 return Op.getOperand(2);
11128 if (Mask.isAllOnes())
11129 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11130 Op.getOperand(3));
11131 unsigned MB = 0, ME = 0;
11132 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11133 report_fatal_error("invalid rlwimi mask!");
11134 return SDValue(DAG.getMachineNode(
11135 PPC::RLWIMI, dl, MVT::i32,
11136 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11137 DAG.getTargetConstant(MB, dl, MVT::i32),
11138 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11139 0);
11140 }
11141
11142 case Intrinsic::ppc_bcdshift:
11143 return MapNodeWithSplatVector(PPCISD::BCDSHIFT, {Op.getOperand(3)});
11144 case Intrinsic::ppc_bcdshiftround:
11145 return MapNodeWithSplatVector(PPCISD::BCDSHIFTROUND, {Op.getOperand(3)});
11146 case Intrinsic::ppc_bcdtruncate:
11147 return MapNodeWithSplatVector(PPCISD::BCDTRUNC, {Op.getOperand(3)});
11148 case Intrinsic::ppc_bcdunsignedtruncate:
11149 return MapNodeWithSplatVector(PPCISD::BCDUTRUNC);
11150 case Intrinsic::ppc_bcdunsignedshift:
11151 return MapNodeWithSplatVector(PPCISD::BCDUSHIFT);
11152
11153 case Intrinsic::ppc_rlwnm: {
11154 if (Op.getConstantOperandVal(3) == 0)
11155 return DAG.getConstant(0, dl, MVT::i32);
11156 unsigned MB = 0, ME = 0;
11157 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11158 report_fatal_error("invalid rlwnm mask!");
11159 return SDValue(
11160 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11161 {Op.getOperand(1), Op.getOperand(2),
11162 DAG.getTargetConstant(MB, dl, MVT::i32),
11163 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11164 0);
11165 }
11166
11167 case Intrinsic::ppc_mma_disassemble_acc: {
11168 if (Subtarget.isISAFuture()) {
11169 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11170 SDValue WideVec =
11171 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11172 Op.getOperand(1)),
11173 0);
11175 SDValue Value = SDValue(WideVec.getNode(), 0);
11176 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11177
11178 SDValue Extract;
11179 Extract = DAG.getNode(
11180 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11181 Subtarget.isLittleEndian() ? Value2 : Value,
11182 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11183 dl, getPointerTy(DAG.getDataLayout())));
11184 RetOps.push_back(Extract);
11185 Extract = DAG.getNode(
11186 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11187 Subtarget.isLittleEndian() ? Value2 : Value,
11188 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11189 dl, getPointerTy(DAG.getDataLayout())));
11190 RetOps.push_back(Extract);
11191 Extract = DAG.getNode(
11192 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11193 Subtarget.isLittleEndian() ? Value : Value2,
11194 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11195 dl, getPointerTy(DAG.getDataLayout())));
11196 RetOps.push_back(Extract);
11197 Extract = DAG.getNode(
11198 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11199 Subtarget.isLittleEndian() ? Value : Value2,
11200 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11201 dl, getPointerTy(DAG.getDataLayout())));
11202 RetOps.push_back(Extract);
11203 return DAG.getMergeValues(RetOps, dl);
11204 }
11205 [[fallthrough]];
11206 }
11207 case Intrinsic::ppc_vsx_disassemble_pair: {
11208 int NumVecs = 2;
11209 SDValue WideVec = Op.getOperand(1);
11210 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11211 NumVecs = 4;
11212 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11213 }
11215 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11216 SDValue Extract = DAG.getNode(
11217 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11218 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11219 : VecNo,
11220 dl, getPointerTy(DAG.getDataLayout())));
11221 RetOps.push_back(Extract);
11222 }
11223 return DAG.getMergeValues(RetOps, dl);
11224 }
11225
11226 case Intrinsic::ppc_build_dmr: {
11229 for (int i = 1; i < 9; i += 2) {
11230 SDValue Hi = Op.getOperand(i);
11231 SDValue Lo = Op.getOperand(i + 1);
11232 if (Hi->getOpcode() == ISD::LOAD)
11233 Chains.push_back(Hi.getValue(1));
11234 if (Lo->getOpcode() == ISD::LOAD)
11235 Chains.push_back(Lo.getValue(1));
11236 Pairs.push_back(
11237 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11238 }
11239 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11240 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11241 return DAG.getMergeValues({Value, TF}, dl);
11242 }
11243
11244 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11245 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11246 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11247 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11248 "Specify P of 0 or 1 for lower or upper 512 bytes");
11249 unsigned HiLo = Idx->getSExtValue();
11250 unsigned Opcode;
11251 unsigned Subx;
11252 if (HiLo == 0) {
11253 Opcode = PPC::DMXXEXTFDMR512;
11254 Subx = PPC::sub_wacc_lo;
11255 } else {
11256 Opcode = PPC::DMXXEXTFDMR512_HI;
11257 Subx = PPC::sub_wacc_hi;
11258 }
11259 SDValue Subreg(
11260 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11261 Op.getOperand(1),
11262 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11263 0);
11264 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11265 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11266 }
11267
11268 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11269 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11270 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11271 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11272 "Specify a dmr row pair 0-3");
11273 unsigned IdxVal = Idx->getSExtValue();
11274 unsigned Subx;
11275 switch (IdxVal) {
11276 case 0:
11277 Subx = PPC::sub_dmrrowp0;
11278 break;
11279 case 1:
11280 Subx = PPC::sub_dmrrowp1;
11281 break;
11282 case 2:
11283 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11284 break;
11285 case 3:
11286 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11287 break;
11288 }
11289 SDValue Subreg(
11290 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11291 Op.getOperand(1),
11292 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11293 0);
11294 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11295 return SDValue(
11296 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11297 0);
11298 }
11299
11300 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11301 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11302 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11303 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11304 "Specify P of 0 or 1 for lower or upper 512 bytes");
11305 unsigned HiLo = Idx->getSExtValue();
11306 unsigned Opcode;
11307 unsigned Subx;
11308 if (HiLo == 0) {
11309 Opcode = PPCISD::INST512;
11310 Subx = PPC::sub_wacc_lo;
11311 } else {
11312 Opcode = PPCISD::INST512HI;
11313 Subx = PPC::sub_wacc_hi;
11314 }
11315 SDValue Wacc = DAG.getNode(Opcode, dl, MVT::v512i1, Op.getOperand(2),
11316 Op.getOperand(3));
11317 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11318 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11319 Op.getOperand(1), Wacc, SubReg),
11320 0);
11321 }
11322
11323 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11324 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11325 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11326 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11327 "Specify a dmr row pair 0-3");
11328 unsigned IdxVal = Idx->getSExtValue();
11329 unsigned Subx;
11330 switch (IdxVal) {
11331 case 0:
11332 Subx = PPC::sub_dmrrowp0;
11333 break;
11334 case 1:
11335 Subx = PPC::sub_dmrrowp1;
11336 break;
11337 case 2:
11338 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11339 break;
11340 case 3:
11341 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11342 break;
11343 }
11344 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11345 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11346 SDValue DMRRowp =
11347 DAG.getNode(PPCISD::INST256, dl, MVT::v256i1, Op.getOperand(2), P);
11348 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11349 Op.getOperand(1), DMRRowp, SubReg),
11350 0);
11351 }
11352
11353 case Intrinsic::ppc_mma_xxmfacc:
11354 case Intrinsic::ppc_mma_xxmtacc: {
11355 // Allow pre-isa-future subtargets to lower as normal.
11356 if (!Subtarget.isISAFuture())
11357 return SDValue();
11358 // The intrinsics for xxmtacc and xxmfacc take one argument of
11359 // type v512i1, for future cpu the corresponding wacc instruction
11360 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11361 // the need to produce the xxm[t|f]acc.
11362 SDValue WideVec = Op.getOperand(1);
11363 DAG.ReplaceAllUsesWith(Op, WideVec);
11364 return SDValue();
11365 }
11366
11367 case Intrinsic::ppc_unpack_longdouble: {
11368 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11369 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11370 "Argument of long double unpack must be 0 or 1!");
11371 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11372 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11373 Idx->getValueType(0)));
11374 }
11375
11376 case Intrinsic::ppc_compare_exp_lt:
11377 case Intrinsic::ppc_compare_exp_gt:
11378 case Intrinsic::ppc_compare_exp_eq:
11379 case Intrinsic::ppc_compare_exp_uo: {
11380 unsigned Pred;
11381 switch (IntrinsicID) {
11382 case Intrinsic::ppc_compare_exp_lt:
11383 Pred = PPC::PRED_LT;
11384 break;
11385 case Intrinsic::ppc_compare_exp_gt:
11386 Pred = PPC::PRED_GT;
11387 break;
11388 case Intrinsic::ppc_compare_exp_eq:
11389 Pred = PPC::PRED_EQ;
11390 break;
11391 case Intrinsic::ppc_compare_exp_uo:
11392 Pred = PPC::PRED_UN;
11393 break;
11394 }
11395 return SDValue(
11396 DAG.getMachineNode(
11397 PPC::SELECT_CC_I4, dl, MVT::i32,
11398 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11399 Op.getOperand(1), Op.getOperand(2)),
11400 0),
11401 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11402 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11403 0);
11404 }
11405 case Intrinsic::ppc_test_data_class: {
11406 EVT OpVT = Op.getOperand(1).getValueType();
11407 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11408 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11409 : PPC::XSTSTDCSP);
11410 // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
11411 // The XSTSTDC* instructions test if a floating-point value matches any of
11412 // the data classes specified in the mask, setting CR field bits
11413 // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
11414 // convert it to an integer result (1 if match, 0 if no match).
11415 //
11416 // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
11417 // intrinsic provides (value, mask) as Op.getOperand(1) and
11418 // Op.getOperand(2).
11419 SDValue TestDataClass =
11420 SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
11421 {Op.getOperand(2), Op.getOperand(1)}),
11422 0);
11423 if (Subtarget.isISA3_1()) {
11424 // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
11425 // This is more efficient than the SELECT_CC approach used in earlier
11426 // ISAs.
11427 SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
11428 SDValue CRBit =
11429 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11430 TestDataClass, SubRegIdx),
11431 0);
11432
11433 return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
11434 }
11435
11436 // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
11437 return SDValue(
11438 DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
11439 {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
11440 DAG.getConstant(0, dl, MVT::i32),
11441 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11442 0);
11443 }
11444 case Intrinsic::ppc_fnmsub: {
11445 EVT VT = Op.getOperand(1).getValueType();
11446 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11447 return DAG.getNode(
11448 ISD::FNEG, dl, VT,
11449 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11450 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11451 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11452 Op.getOperand(2), Op.getOperand(3));
11453 }
11454 case Intrinsic::ppc_convert_f128_to_ppcf128:
11455 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11456 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11457 ? RTLIB::CONVERT_PPCF128_F128
11458 : RTLIB::CONVERT_F128_PPCF128;
11459 MakeLibCallOptions CallOptions;
11460 std::pair<SDValue, SDValue> Result =
11461 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11462 dl, SDValue());
11463 return Result.first;
11464 }
11465 case Intrinsic::ppc_maxfe:
11466 case Intrinsic::ppc_maxfl:
11467 case Intrinsic::ppc_maxfs:
11468 case Intrinsic::ppc_minfe:
11469 case Intrinsic::ppc_minfl:
11470 case Intrinsic::ppc_minfs: {
11471 EVT VT = Op.getValueType();
11472 assert(
11473 all_of(Op->ops().drop_front(4),
11474 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11475 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11476 (void)VT;
11478 if (IntrinsicID == Intrinsic::ppc_minfe ||
11479 IntrinsicID == Intrinsic::ppc_minfl ||
11480 IntrinsicID == Intrinsic::ppc_minfs)
11481 CC = ISD::SETLT;
11482 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11483 SDValue Res = Op.getOperand(I);
11484 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11485 Res =
11486 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11487 }
11488 return Res;
11489 }
11490 }
11491
11492 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11493 // opcode number of the comparison.
11494 int CompareOpc;
11495 bool isDot;
11496 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11497 return SDValue(); // Don't custom lower most intrinsics.
11498
11499 // If this is a non-dot comparison, make the VCMP node and we are done.
11500 if (!isDot) {
11501 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11502 Op.getOperand(1), Op.getOperand(2),
11503 DAG.getConstant(CompareOpc, dl, MVT::i32));
11504 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11505 }
11506
11507 // Create the PPCISD altivec 'dot' comparison node.
11508 SDValue Ops[] = {
11509 Op.getOperand(2), // LHS
11510 Op.getOperand(3), // RHS
11511 DAG.getConstant(CompareOpc, dl, MVT::i32)
11512 };
11513 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11514 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11515
11516 // Unpack the result based on how the target uses it.
11517 unsigned BitNo; // Bit # of CR6.
11518 bool InvertBit; // Invert result?
11519 unsigned Bitx;
11520 unsigned SetOp;
11521 switch (Op.getConstantOperandVal(1)) {
11522 default: // Can't happen, don't crash on invalid number though.
11523 case 0: // Return the value of the EQ bit of CR6.
11524 BitNo = 0;
11525 InvertBit = false;
11526 Bitx = PPC::sub_eq;
11527 SetOp = PPCISD::SETBC;
11528 break;
11529 case 1: // Return the inverted value of the EQ bit of CR6.
11530 BitNo = 0;
11531 InvertBit = true;
11532 Bitx = PPC::sub_eq;
11533 SetOp = PPCISD::SETBCR;
11534 break;
11535 case 2: // Return the value of the LT bit of CR6.
11536 BitNo = 2;
11537 InvertBit = false;
11538 Bitx = PPC::sub_lt;
11539 SetOp = PPCISD::SETBC;
11540 break;
11541 case 3: // Return the inverted value of the LT bit of CR6.
11542 BitNo = 2;
11543 InvertBit = true;
11544 Bitx = PPC::sub_lt;
11545 SetOp = PPCISD::SETBCR;
11546 break;
11547 }
11548
11549 SDValue GlueOp = CompNode.getValue(1);
11550 if (Subtarget.isISA3_1()) {
11551 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11552 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11553 SDValue CRBit =
11554 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11555 CR6Reg, SubRegIdx, GlueOp),
11556 0);
11557 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11558 }
11559
11560 // Now that we have the comparison, emit a copy from the CR to a GPR.
11561 // This is flagged to the above dot comparison.
11562 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11563 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11564
11565 // Shift the bit into the low position.
11566 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11567 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11568 // Isolate the bit.
11569 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11570 DAG.getConstant(1, dl, MVT::i32));
11571
11572 // If we are supposed to, toggle the bit.
11573 if (InvertBit)
11574 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11575 DAG.getConstant(1, dl, MVT::i32));
11576 return Flags;
11577}
11578
11579SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11580 SelectionDAG &DAG) const {
11581 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11582 // the beginning of the argument list.
11583 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11584 SDLoc DL(Op);
11585 switch (Op.getConstantOperandVal(ArgStart)) {
11586 case Intrinsic::ppc_cfence: {
11587 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11588 SDValue Val = Op.getOperand(ArgStart + 1);
11589 EVT Ty = Val.getValueType();
11590 if (Ty == MVT::i128) {
11591 // FIXME: Testing one of two paired registers is sufficient to guarantee
11592 // ordering?
11593 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11594 }
11595 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11596 return SDValue(
11597 DAG.getMachineNode(
11598 Opcode, DL, MVT::Other,
11599 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11600 Op.getOperand(0)),
11601 0);
11602 }
11603 case Intrinsic::ppc_disassemble_dmr: {
11604 assert(ArgStart == 1 &&
11605 "llvm.ppc.disassemble.dmr must carry a chain argument.");
11606 return DAG.getStore(Op.getOperand(0), DL, Op.getOperand(ArgStart + 2),
11607 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11608 }
11609 default:
11610 break;
11611 }
11612 return SDValue();
11613}
11614
11615// Lower scalar BSWAP64 to xxbrd.
11616SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11617 SDLoc dl(Op);
11618 if (!Subtarget.isPPC64())
11619 return Op;
11620
11621 if (Subtarget.hasP9Vector()) {
11622 // MTVSRDD
11623 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11624 Op.getOperand(0));
11625 // XXBRD
11626 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11627 // MFVSRD
11628 int VectorIndex = 0;
11629 if (Subtarget.isLittleEndian())
11630 VectorIndex = 1;
11631 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11632 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11633 return Op;
11634 }
11635
11636 // For Power8, use parallel rotate instructions for faster bswap64.
11637 SDValue Input = Op.getOperand(0);
11638 // Helper to create rotate-and-insert operations (RLWIMI/RLDIMI).
11639 auto CreateRotateInsert =
11640 [&](unsigned Opcode, MVT VT, SDValue Dest, SDValue Src, unsigned RotAmt,
11641 unsigned MaskBegin,
11642 std::optional<unsigned> MaskEnd = std::nullopt) -> SDValue {
11644 Dest, Src, DAG.getTargetConstant(RotAmt, dl, MVT::i32),
11645 DAG.getTargetConstant(MaskBegin, dl, MVT::i32)};
11646 if (MaskEnd.has_value())
11647 Ops.push_back(DAG.getTargetConstant(*MaskEnd, dl, MVT::i32));
11648
11649 return SDValue(DAG.getMachineNode(Opcode, dl, VT, Ops), 0);
11650 };
11651
11652 // Helper to perform 32-bit byte swap using rotl(8) + 2x rlwimi.
11653 auto Swap32 = [&](SDValue Val32) -> SDValue {
11654 SDValue Rot = DAG.getNode(ISD::ROTL, dl, MVT::i32, Val32,
11655 DAG.getConstant(8, dl, MVT::i32));
11656 // Insert bits [24:31] from Val32 into Rot at position [0:7].
11657 SDValue Swap =
11658 CreateRotateInsert(PPC::RLWIMI, MVT::i32, Rot, Val32, 24, 0, 7);
11659 // Insert bits [16:23] from Val32 into Swap at position [16:23].
11660 return CreateRotateInsert(PPC::RLWIMI, MVT::i32, Swap, Val32, 24, 16, 23);
11661 };
11662 // Extract and swap high and low 32-bit halves independently for parallelism.
11663 SDValue Hi32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
11664 DAG.getNode(ISD::SRL, dl, MVT::i64, Input,
11665 DAG.getConstant(32, dl, MVT::i64)));
11666 SDValue Lo32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Input);
11667
11668 // Combine swapped halves: rotate LoSwap left by 32 bits and insert into
11669 // HiSwap to swap their positions, completing the 64-bit byte reversal.
11670 SDValue HiSwap = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Swap32(Hi32));
11671 SDValue LoSwap = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Swap32(Lo32));
11672
11673 return CreateRotateInsert(PPC::RLDIMI, MVT::i64, HiSwap, LoSwap, 32, 0);
11674}
11675
11676// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11677// compared to a value that is atomically loaded (atomic loads zero-extend).
11678SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11679 SelectionDAG &DAG) const {
11680 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11681 "Expecting an atomic compare-and-swap here.");
11682 SDLoc dl(Op);
11683 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11684 EVT MemVT = AtomicNode->getMemoryVT();
11685 if (MemVT.getSizeInBits() >= 32)
11686 return Op;
11687
11688 SDValue CmpOp = Op.getOperand(2);
11689 // If this is already correctly zero-extended, leave it alone.
11690 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11691 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11692 return Op;
11693
11694 // Clear the high bits of the compare operand.
11695 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11696 SDValue NewCmpOp =
11697 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11698 DAG.getConstant(MaskVal, dl, MVT::i32));
11699
11700 // Replace the existing compare operand with the properly zero-extended one.
11702 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11703 Ops.push_back(AtomicNode->getOperand(i));
11704 Ops[2] = NewCmpOp;
11705 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11706 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11707 auto NodeTy =
11708 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11709 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11710}
11711
11712SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11713 SelectionDAG &DAG) const {
11714 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11715 EVT MemVT = N->getMemoryVT();
11716 assert(MemVT.getSimpleVT() == MVT::i128 &&
11717 "Expect quadword atomic operations");
11718 SDLoc dl(N);
11719 unsigned Opc = N->getOpcode();
11720 switch (Opc) {
11721 case ISD::ATOMIC_LOAD: {
11722 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11723 // lowered to ppc instructions by pattern matching instruction selector.
11724 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11726 N->getOperand(0),
11727 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11728 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11729 Ops.push_back(N->getOperand(I));
11730 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11731 Ops, MemVT, N->getMemOperand());
11732 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11733 SDValue ValHi =
11734 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11735 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11736 DAG.getConstant(64, dl, MVT::i32));
11737 SDValue Val =
11738 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11739 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11740 {Val, LoadedVal.getValue(2)});
11741 }
11742 case ISD::ATOMIC_STORE: {
11743 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11744 // lowered to ppc instructions by pattern matching instruction selector.
11745 SDVTList Tys = DAG.getVTList(MVT::Other);
11747 N->getOperand(0),
11748 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11749 SDValue Val = N->getOperand(1);
11750 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11751 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11752 DAG.getConstant(64, dl, MVT::i32));
11753 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11754 Ops.push_back(ValLo);
11755 Ops.push_back(ValHi);
11756 Ops.push_back(N->getOperand(2));
11757 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11758 N->getMemOperand());
11759 }
11760 default:
11761 llvm_unreachable("Unexpected atomic opcode");
11762 }
11763}
11764
11766 SelectionDAG &DAG,
11767 const PPCSubtarget &Subtarget) {
11768 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11769
11770 enum DataClassMask {
11771 DC_NAN = 1 << 6,
11772 DC_NEG_INF = 1 << 4,
11773 DC_POS_INF = 1 << 5,
11774 DC_NEG_ZERO = 1 << 2,
11775 DC_POS_ZERO = 1 << 3,
11776 DC_NEG_SUBNORM = 1,
11777 DC_POS_SUBNORM = 1 << 1,
11778 };
11779
11780 EVT VT = Op.getValueType();
11781
11782 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11783 : VT == MVT::f64 ? PPC::XSTSTDCDP
11784 : PPC::XSTSTDCSP;
11785
11786 if (Mask == fcAllFlags)
11787 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11788 if (Mask == 0)
11789 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11790
11791 // When it's cheaper or necessary to test reverse flags.
11792 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11793 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11794 return DAG.getNOT(Dl, Rev, MVT::i1);
11795 }
11796
11797 // Power doesn't support testing whether a value is 'normal'. Test the rest
11798 // first, and test if it's 'not not-normal' with expected sign.
11799 if (Mask & fcNormal) {
11800 SDValue Rev(DAG.getMachineNode(
11801 TestOp, Dl, MVT::i32,
11802 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11803 DC_NEG_ZERO | DC_POS_ZERO |
11804 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11805 Dl, MVT::i32),
11806 Op),
11807 0);
11808 // Sign are stored in CR bit 0, result are in CR bit 2.
11809 SDValue Sign(
11810 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11811 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11812 0);
11813 SDValue Normal(DAG.getNOT(
11814 Dl,
11816 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11817 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11818 0),
11819 MVT::i1));
11820 if (Mask & fcPosNormal)
11821 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11822 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11823 if (Mask == fcPosNormal || Mask == fcNegNormal)
11824 return Result;
11825
11826 return DAG.getNode(
11827 ISD::OR, Dl, MVT::i1,
11828 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11829 }
11830
11831 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11832 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11833 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11834 bool IsQuiet = Mask & fcQNan;
11835 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11836
11837 // Quietness is determined by the first bit in fraction field.
11838 uint64_t QuietMask = 0;
11839 SDValue HighWord;
11840 if (VT == MVT::f128) {
11841 HighWord = DAG.getNode(
11842 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11843 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11844 QuietMask = 0x8000;
11845 } else if (VT == MVT::f64) {
11846 if (Subtarget.isPPC64()) {
11847 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11848 DAG.getBitcast(MVT::i64, Op),
11849 DAG.getConstant(1, Dl, MVT::i32));
11850 } else {
11851 SDValue Vec = DAG.getBitcast(
11852 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11853 HighWord = DAG.getNode(
11854 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11855 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11856 }
11857 QuietMask = 0x80000;
11858 } else if (VT == MVT::f32) {
11859 HighWord = DAG.getBitcast(MVT::i32, Op);
11860 QuietMask = 0x400000;
11861 }
11862 SDValue NanRes = DAG.getSetCC(
11863 Dl, MVT::i1,
11864 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11865 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11866 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11867 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11868 if (Mask == fcQNan || Mask == fcSNan)
11869 return NanRes;
11870
11871 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11872 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11873 NanRes);
11874 }
11875
11876 unsigned NativeMask = 0;
11877 if ((Mask & fcNan) == fcNan)
11878 NativeMask |= DC_NAN;
11879 if (Mask & fcNegInf)
11880 NativeMask |= DC_NEG_INF;
11881 if (Mask & fcPosInf)
11882 NativeMask |= DC_POS_INF;
11883 if (Mask & fcNegZero)
11884 NativeMask |= DC_NEG_ZERO;
11885 if (Mask & fcPosZero)
11886 NativeMask |= DC_POS_ZERO;
11887 if (Mask & fcNegSubnormal)
11888 NativeMask |= DC_NEG_SUBNORM;
11889 if (Mask & fcPosSubnormal)
11890 NativeMask |= DC_POS_SUBNORM;
11891 return SDValue(
11892 DAG.getMachineNode(
11893 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11895 TestOp, Dl, MVT::i32,
11896 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11897 0),
11898 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11899 0);
11900}
11901
11902SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11903 SelectionDAG &DAG) const {
11904 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11905 SDValue LHS = Op.getOperand(0);
11906 uint64_t RHSC = Op.getConstantOperandVal(1);
11907 SDLoc Dl(Op);
11908 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11909 if (LHS.getValueType() == MVT::ppcf128) {
11910 // The higher part determines the value class.
11911 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11912 DAG.getConstant(1, Dl, MVT::i32));
11913 }
11914
11915 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11916}
11917
11918// Adjust the length value for a load/store with length to account for the
11919// instructions requiring a left justified length, and for non-byte element
11920// types requiring scaling by element size.
11921static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11922 SelectionDAG &DAG) {
11923 SDLoc dl(Val);
11924 EVT VT = Val->getValueType(0);
11925 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11926 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11927 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11928 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11929}
11930
11931SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11932 auto VPLD = cast<VPLoadSDNode>(Op);
11933 bool Future = Subtarget.isISAFuture();
11934 SDLoc dl(Op);
11935 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11936 "Mask predication not supported");
11937 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11938 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11939 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11940 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11941 Len = AdjustLength(Len, EltBits, !Future, DAG);
11942 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11943 VPLD->getOperand(1), Len};
11944 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11945 SDValue VPL =
11947 VPLD->getMemoryVT(), VPLD->getMemOperand());
11948 return VPL;
11949}
11950
11951SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11952 auto VPST = cast<VPStoreSDNode>(Op);
11953 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11954 "Mask predication not supported");
11955 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11956 SDLoc dl(Op);
11957 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11958 unsigned EltBits =
11959 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11960 bool Future = Subtarget.isISAFuture();
11961 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11962 Len = AdjustLength(Len, EltBits, !Future, DAG);
11963 SDValue Ops[] = {
11964 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11965 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11966 VPST->getOperand(2), Len};
11967 SDVTList Tys = DAG.getVTList(MVT::Other);
11968 SDValue VPS =
11970 VPST->getMemoryVT(), VPST->getMemOperand());
11971 return VPS;
11972}
11973
11974SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11975 SelectionDAG &DAG) const {
11976 SDLoc dl(Op);
11977
11978 MachineFunction &MF = DAG.getMachineFunction();
11979 SDValue Op0 = Op.getOperand(0);
11980 EVT ValVT = Op0.getValueType();
11981 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11982 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11983 int64_t IntVal = Op.getConstantOperandVal(0);
11984 if (IntVal >= -16 && IntVal <= 15)
11985 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11986 dl);
11987 }
11988
11989 ReuseLoadInfo RLI;
11990 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11991 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11992 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11993 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11994
11995 MachineMemOperand *MMO =
11997 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11998 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
12000 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
12001 MVT::i32, MMO);
12002 if (RLI.ResChain)
12003 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
12004 return Bits.getValue(0);
12005 }
12006
12007 // Create a stack slot that is 16-byte aligned.
12008 MachineFrameInfo &MFI = MF.getFrameInfo();
12009 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
12010 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12011 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
12012
12013 SDValue Val = Op0;
12014 // P10 hardware store forwarding requires that a single store contains all
12015 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
12016 // to avoid load hit store on P10 when running binaries compiled for older
12017 // processors by generating two mergeable scalar stores to forward with the
12018 // vector load.
12019 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
12020 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
12021 ValVT.getSizeInBits() <= 64) {
12022 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
12023 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
12024 SDValue ShiftBy = DAG.getConstant(
12025 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
12026 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
12027 SDValue Plus8 =
12028 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
12029 SDValue Store2 =
12030 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
12031 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
12032 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
12033 MachinePointerInfo());
12034 }
12035
12036 // Store the input value into Value#0 of the stack slot.
12037 SDValue Store =
12038 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
12039 // Load it out.
12040 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
12041}
12042
12043SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
12044 SelectionDAG &DAG) const {
12045 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12046 "Should only be called for ISD::INSERT_VECTOR_ELT");
12047
12048 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
12049
12050 EVT VT = Op.getValueType();
12051 SDLoc dl(Op);
12052 SDValue V1 = Op.getOperand(0);
12053 SDValue V2 = Op.getOperand(1);
12054
12055 if (VT == MVT::v2f64 && C)
12056 return Op;
12057
12058 if (Subtarget.hasP9Vector()) {
12059 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12060 // because on P10, it allows this specific insert_vector_elt load pattern to
12061 // utilize the refactored load and store infrastructure in order to exploit
12062 // prefixed loads.
12063 // On targets with inexpensive direct moves (Power9 and up), a
12064 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12065 // load since a single precision load will involve conversion to double
12066 // precision on the load followed by another conversion to single precision.
12067 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12068 (isa<LoadSDNode>(V2))) {
12069 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12070 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12071 SDValue InsVecElt =
12072 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12073 BitcastLoad, Op.getOperand(2));
12074 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12075 }
12076 }
12077
12078 if (Subtarget.isISA3_1()) {
12079 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12080 return SDValue();
12081 // On P10, we have legal lowering for constant and variable indices for
12082 // all vectors.
12083 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12084 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12085 return Op;
12086 }
12087
12088 // Before P10, we have legal lowering for constant indices but not for
12089 // variable ones.
12090 if (!C)
12091 return SDValue();
12092
12093 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12094 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12095 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12096 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12097 unsigned InsertAtElement = C->getZExtValue();
12098 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12099 if (Subtarget.isLittleEndian()) {
12100 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12101 }
12102 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12103 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12104 }
12105 return Op;
12106}
12107
12108SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12109 SelectionDAG &DAG) const {
12110 SDLoc dl(Op);
12111 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12112 SDValue LoadChain = LN->getChain();
12113 SDValue BasePtr = LN->getBasePtr();
12114 EVT VT = Op.getValueType();
12115 bool IsV1024i1 = VT == MVT::v1024i1;
12116 bool IsV2048i1 = VT == MVT::v2048i1;
12117
12118 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12119 // Dense Math dmr pair registers, respectively.
12120 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12121 (void)IsV2048i1;
12122 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12123 "Dense Math support required.");
12124 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12125
12127 SmallVector<SDValue, 8> LoadChains;
12128
12129 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12130 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12131 MachineMemOperand *MMO = LN->getMemOperand();
12132 unsigned NumVecs = VT.getSizeInBits() / 256;
12133 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12134 MachineMemOperand *NewMMO =
12135 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12136 if (Idx > 0) {
12137 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12138 DAG.getConstant(32, dl, BasePtr.getValueType()));
12139 LoadOps[2] = BasePtr;
12140 }
12142 DAG.getVTList(MVT::v256i1, MVT::Other),
12143 LoadOps, MVT::v256i1, NewMMO);
12144 LoadChains.push_back(Ld.getValue(1));
12145 Loads.push_back(Ld);
12146 }
12147
12148 if (Subtarget.isLittleEndian()) {
12149 std::reverse(Loads.begin(), Loads.end());
12150 std::reverse(LoadChains.begin(), LoadChains.end());
12151 }
12152
12153 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12154 SDValue Value = DMFInsert1024(Loads, dl, DAG);
12155
12156 if (IsV1024i1) {
12157 return DAG.getMergeValues({Value, TF}, dl);
12158 }
12159
12160 // Handle Loads for V2048i1 which represents a dmr pair.
12161 SmallVector<SDValue, 4> MoreLoads{Loads[4], Loads[5], Loads[6], Loads[7]};
12162 SDValue Dmr1Value = DMFInsert1024(MoreLoads, dl, DAG);
12163
12164 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12165 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12166
12167 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12168 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12169
12170 SDValue DmrPValue = SDValue(
12171 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12172
12173 return DAG.getMergeValues({DmrPValue, TF}, dl);
12174}
12175
12176SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12177 const SDLoc &dl,
12178 SelectionDAG &DAG) const {
12179 SDValue Lo =
12180 DAG.getNode(PPCISD::INST512, dl, MVT::v512i1, Pairs[0], Pairs[1]);
12181 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12182 SDValue Hi =
12183 DAG.getNode(PPCISD::INST512HI, dl, MVT::v512i1, Pairs[2], Pairs[3]);
12184 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12185 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12186
12187 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12188 {RC, Lo, LoSub, Hi, HiSub}),
12189 0);
12190}
12191
12192SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12193 SelectionDAG &DAG) const {
12194 SDLoc dl(Op);
12195 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12196 SDValue LoadChain = LN->getChain();
12197 SDValue BasePtr = LN->getBasePtr();
12198 EVT VT = Op.getValueType();
12199
12200 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12201 return LowerDMFVectorLoad(Op, DAG);
12202
12203 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12204 return Op;
12205
12206 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12207 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12208 "Type unsupported without MMA");
12209 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12210 "Type unsupported without paired vector support");
12211
12212 // For v256i1 on ISA Future, let the load go through to instruction selection
12213 // where it will be matched to lxvp/plxvp by the instruction patterns.
12214 if (VT == MVT::v256i1 && Subtarget.isISAFuture())
12215 return Op;
12216
12217 // For other cases, create 2 or 4 v16i8 loads to load the pair or accumulator
12218 // value in 2 or 4 vsx registers.
12219 Align Alignment = LN->getAlign();
12221 SmallVector<SDValue, 4> LoadChains;
12222 unsigned NumVecs = VT.getSizeInBits() / 128;
12223 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12224 SDValue Load =
12225 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12226 LN->getPointerInfo().getWithOffset(Idx * 16),
12227 commonAlignment(Alignment, Idx * 16),
12228 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12229 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12230 DAG.getConstant(16, dl, BasePtr.getValueType()));
12231 Loads.push_back(Load);
12232 LoadChains.push_back(Load.getValue(1));
12233 }
12234 if (Subtarget.isLittleEndian()) {
12235 std::reverse(Loads.begin(), Loads.end());
12236 std::reverse(LoadChains.begin(), LoadChains.end());
12237 }
12238 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12239 SDValue Value =
12240 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12241 dl, VT, Loads);
12242 SDValue RetOps[] = {Value, TF};
12243 return DAG.getMergeValues(RetOps, dl);
12244}
12245
12246SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12247 SelectionDAG &DAG) const {
12248
12249 SDLoc dl(Op);
12250 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12251 SDValue StoreChain = SN->getChain();
12252 SDValue BasePtr = SN->getBasePtr();
12255 EVT VT = SN->getValue().getValueType();
12256 bool IsV1024i1 = VT == MVT::v1024i1;
12257 bool IsV2048i1 = VT == MVT::v2048i1;
12258
12259 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12260 // Dense Math dmr pair registers, respectively.
12261 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12262 (void)IsV2048i1;
12263 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12264 "Dense Math support required.");
12265 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12266
12267 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12268 if (IsV1024i1) {
12270 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12271 Op.getOperand(1),
12272 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12273 0);
12275 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12276 Op.getOperand(1),
12277 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12278 0);
12279 MachineSDNode *ExtNode =
12280 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12281 Values.push_back(SDValue(ExtNode, 0));
12282 Values.push_back(SDValue(ExtNode, 1));
12283 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12284 Values.push_back(SDValue(ExtNode, 0));
12285 Values.push_back(SDValue(ExtNode, 1));
12286 } else {
12287 // This corresponds to v2048i1 which represents a dmr pair.
12288 SDValue Dmr0(
12289 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12290 Op.getOperand(1),
12291 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12292 0);
12293
12294 SDValue Dmr1(
12295 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12296 Op.getOperand(1),
12297 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12298 0);
12299
12300 SDValue Dmr0Lo(DAG.getMachineNode(
12301 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12302 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12303 0);
12304
12305 SDValue Dmr0Hi(DAG.getMachineNode(
12306 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12307 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12308 0);
12309
12310 SDValue Dmr1Lo(DAG.getMachineNode(
12311 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12312 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12313 0);
12314
12315 SDValue Dmr1Hi(DAG.getMachineNode(
12316 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12317 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12318 0);
12319
12320 MachineSDNode *ExtNode =
12321 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12322 Values.push_back(SDValue(ExtNode, 0));
12323 Values.push_back(SDValue(ExtNode, 1));
12324 ExtNode =
12325 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12326 Values.push_back(SDValue(ExtNode, 0));
12327 Values.push_back(SDValue(ExtNode, 1));
12328 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12329 Values.push_back(SDValue(ExtNode, 0));
12330 Values.push_back(SDValue(ExtNode, 1));
12331 ExtNode =
12332 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12333 Values.push_back(SDValue(ExtNode, 0));
12334 Values.push_back(SDValue(ExtNode, 1));
12335 }
12336
12337 if (Subtarget.isLittleEndian())
12338 std::reverse(Values.begin(), Values.end());
12339
12340 SDVTList Tys = DAG.getVTList(MVT::Other);
12342 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12343 Values[0], BasePtr};
12344 MachineMemOperand *MMO = SN->getMemOperand();
12345 unsigned NumVecs = VT.getSizeInBits() / 256;
12346 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12347 MachineMemOperand *NewMMO =
12348 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12349 if (Idx > 0) {
12350 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12351 DAG.getConstant(32, dl, BasePtr.getValueType()));
12352 Ops[3] = BasePtr;
12353 }
12354 Ops[2] = Values[Idx];
12356 MVT::v256i1, NewMMO);
12357 Stores.push_back(St);
12358 }
12359
12360 SDValue TF = DAG.getTokenFactor(dl, Stores);
12361 return TF;
12362}
12363
12364SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12365 SelectionDAG &DAG) const {
12366 SDLoc dl(Op);
12367 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12368 SDValue StoreChain = SN->getChain();
12369 SDValue BasePtr = SN->getBasePtr();
12370 SDValue Value = SN->getValue();
12371 SDValue Value2 = SN->getValue();
12372 EVT StoreVT = Value.getValueType();
12373
12374 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12375 return LowerDMFVectorStore(Op, DAG);
12376
12377 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12378 return Op;
12379
12380 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12381 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12382 "Type unsupported without MMA");
12383 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12384 "Type unsupported without paired vector support");
12385
12386 // For v256i1 on ISA Future, let the store go through to instruction selection
12387 // where it will be matched to stxvp/pstxvp by the instruction patterns.
12388 if (StoreVT == MVT::v256i1 && Subtarget.isISAFuture() &&
12390 return Op;
12391
12392 // For other cases, create 2 or 4 v16i8 stores to store the pair or
12393 // accumulator underlying registers individually.
12394 Align Alignment = SN->getAlign();
12396 unsigned NumVecs = 2;
12397 if (StoreVT == MVT::v512i1) {
12398 if (Subtarget.isISAFuture()) {
12399 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12400 MachineSDNode *ExtNode = DAG.getMachineNode(
12401 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12402
12403 Value = SDValue(ExtNode, 0);
12404 Value2 = SDValue(ExtNode, 1);
12405 } else
12406 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12407 NumVecs = 4;
12408 }
12409 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12410 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12411 SDValue Elt;
12412 if (Subtarget.isISAFuture()) {
12413 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12414 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12415 Idx > 1 ? Value2 : Value,
12416 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12417 } else
12418 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12419 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12420
12421 SDValue Store =
12422 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12423 SN->getPointerInfo().getWithOffset(Idx * 16),
12424 commonAlignment(Alignment, Idx * 16),
12425 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12426 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12427 DAG.getConstant(16, dl, BasePtr.getValueType()));
12428 Stores.push_back(Store);
12429 }
12430 SDValue TF = DAG.getTokenFactor(dl, Stores);
12431 return TF;
12432}
12433
12434SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12435 SDLoc dl(Op);
12436 if (Op.getValueType() == MVT::v4i32) {
12437 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12438
12439 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12440 // +16 as shift amt.
12441 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12442 SDValue RHSSwap = // = vrlw RHS, 16
12443 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12444
12445 // Shrinkify inputs to v8i16.
12446 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12447 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12448 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12449
12450 // Low parts multiplied together, generating 32-bit results (we ignore the
12451 // top parts).
12452 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12453 LHS, RHS, DAG, dl, MVT::v4i32);
12454
12455 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12456 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12457 // Shift the high parts up 16 bits.
12458 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12459 Neg16, DAG, dl);
12460 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12461 } else if (Op.getValueType() == MVT::v16i8) {
12462 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12463 bool isLittleEndian = Subtarget.isLittleEndian();
12464
12465 // Multiply the even 8-bit parts, producing 16-bit sums.
12466 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12467 LHS, RHS, DAG, dl, MVT::v8i16);
12468 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12469
12470 // Multiply the odd 8-bit parts, producing 16-bit sums.
12471 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12472 LHS, RHS, DAG, dl, MVT::v8i16);
12473 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12474
12475 // Merge the results together. Because vmuleub and vmuloub are
12476 // instructions with a big-endian bias, we must reverse the
12477 // element numbering and reverse the meaning of "odd" and "even"
12478 // when generating little endian code.
12479 int Ops[16];
12480 for (unsigned i = 0; i != 8; ++i) {
12481 if (isLittleEndian) {
12482 Ops[i*2 ] = 2*i;
12483 Ops[i*2+1] = 2*i+16;
12484 } else {
12485 Ops[i*2 ] = 2*i+1;
12486 Ops[i*2+1] = 2*i+1+16;
12487 }
12488 }
12489 if (isLittleEndian)
12490 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12491 else
12492 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12493 } else {
12494 llvm_unreachable("Unknown mul to lower!");
12495 }
12496}
12497
12498SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12499 bool IsStrict = Op->isStrictFPOpcode();
12500 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12501 !Subtarget.hasP9Vector())
12502 return SDValue();
12503
12504 return Op;
12505}
12506
12507// Custom lowering for fpext vf32 to v2f64
12508SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12509
12510 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12511 "Should only be called for ISD::FP_EXTEND");
12512
12513 // FIXME: handle extends from half precision float vectors on P9.
12514 // We only want to custom lower an extend from v2f32 to v2f64.
12515 if (Op.getValueType() != MVT::v2f64 ||
12516 Op.getOperand(0).getValueType() != MVT::v2f32)
12517 return SDValue();
12518
12519 SDLoc dl(Op);
12520 SDValue Op0 = Op.getOperand(0);
12521
12522 switch (Op0.getOpcode()) {
12523 default:
12524 return SDValue();
12526 assert(Op0.getNumOperands() == 2 &&
12528 "Node should have 2 operands with second one being a constant!");
12529
12530 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12531 return SDValue();
12532
12533 // Custom lower is only done for high or low doubleword.
12534 int Idx = Op0.getConstantOperandVal(1);
12535 if (Idx % 2 != 0)
12536 return SDValue();
12537
12538 // Since input is v4f32, at this point Idx is either 0 or 2.
12539 // Shift to get the doubleword position we want.
12540 int DWord = Idx >> 1;
12541
12542 // High and low word positions are different on little endian.
12543 if (Subtarget.isLittleEndian())
12544 DWord ^= 0x1;
12545
12546 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12547 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12548 }
12549 case ISD::FADD:
12550 case ISD::FMUL:
12551 case ISD::FSUB: {
12552 SDValue NewLoad[2];
12553 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12554 // Ensure both input are loads.
12555 SDValue LdOp = Op0.getOperand(i);
12556 if (LdOp.getOpcode() != ISD::LOAD)
12557 return SDValue();
12558 // Generate new load node.
12559 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12560 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12561 NewLoad[i] = DAG.getMemIntrinsicNode(
12562 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12563 LD->getMemoryVT(), LD->getMemOperand());
12564 }
12565 SDValue NewOp =
12566 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12567 NewLoad[1], Op0.getNode()->getFlags());
12568 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12569 DAG.getConstant(0, dl, MVT::i32));
12570 }
12571 case ISD::LOAD: {
12572 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12573 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12574 SDValue NewLd = DAG.getMemIntrinsicNode(
12575 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12576 LD->getMemoryVT(), LD->getMemOperand());
12577 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12578 DAG.getConstant(0, dl, MVT::i32));
12579 }
12580 }
12581 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12582}
12583
12585 SelectionDAG &DAG,
12586 const PPCSubtarget &STI) {
12587 SDLoc DL(Value);
12588 if (STI.useCRBits())
12589 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12590 DAG.getConstant(1, DL, SumType),
12591 DAG.getConstant(0, DL, SumType));
12592 else
12593 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12594 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12595 Value, DAG.getAllOnesConstant(DL, SumType));
12596 return Sum.getValue(1);
12597}
12598
12600 EVT CarryType, SelectionDAG &DAG,
12601 const PPCSubtarget &STI) {
12602 SDLoc DL(Flag);
12603 SDValue Zero = DAG.getConstant(0, DL, SumType);
12604 SDValue Carry = DAG.getNode(
12605 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12606 if (STI.useCRBits())
12607 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12608 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12609}
12610
12611SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12612
12613 SDLoc DL(Op);
12614 SDNode *N = Op.getNode();
12615 EVT VT = N->getValueType(0);
12616 EVT CarryType = N->getValueType(1);
12617 unsigned Opc = N->getOpcode();
12618 bool IsAdd = Opc == ISD::UADDO;
12619 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12620 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12621 N->getOperand(0), N->getOperand(1));
12622 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12623 DAG, Subtarget);
12624 if (!IsAdd)
12625 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12626 DAG.getConstant(1UL, DL, CarryType));
12627 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12628}
12629
12630SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12631 SelectionDAG &DAG) const {
12632 SDLoc DL(Op);
12633 SDNode *N = Op.getNode();
12634 unsigned Opc = N->getOpcode();
12635 EVT VT = N->getValueType(0);
12636 EVT CarryType = N->getValueType(1);
12637 SDValue CarryOp = N->getOperand(2);
12638 bool IsAdd = Opc == ISD::UADDO_CARRY;
12639 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12640 if (!IsAdd)
12641 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12642 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12643 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12644 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12645 Op.getOperand(0), Op.getOperand(1), CarryOp);
12646 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12647 Subtarget);
12648 if (!IsAdd)
12649 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12650 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12651 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12652}
12653
12654SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12655
12656 SDLoc dl(Op);
12657 SDValue LHS = Op.getOperand(0);
12658 SDValue RHS = Op.getOperand(1);
12659 EVT VT = Op.getNode()->getValueType(0);
12660
12661 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12662
12663 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12664 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12665
12666 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12667
12668 SDValue Overflow =
12669 DAG.getNode(ISD::SRL, dl, VT, And,
12670 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12671
12672 SDValue OverflowTrunc =
12673 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12674
12675 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12676}
12677
12678/// Implements signed add with overflow detection using the rule:
12679/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12680SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12681
12682 SDLoc dl(Op);
12683 SDValue LHS = Op.getOperand(0);
12684 SDValue RHS = Op.getOperand(1);
12685 EVT VT = Op.getNode()->getValueType(0);
12686
12687 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12688
12689 // Compute ~(x xor y)
12690 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12691 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12692 // Compute (s xor x)
12693 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12694
12695 // overflow = (x eqv y) & (s xor x)
12696 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12697
12698 // Shift sign bit down to LSB
12699 SDValue Overflow =
12700 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12701 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12702 // Truncate to the overflow type (i1)
12703 SDValue OverflowTrunc =
12704 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12705
12706 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12707}
12708
12709// Lower unsigned 3-way compare producing -1/0/1.
12710SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12711 SDLoc DL(Op);
12712 SDValue A = DAG.getFreeze(Op.getOperand(0));
12713 SDValue B = DAG.getFreeze(Op.getOperand(1));
12714 EVT OpVT = A.getValueType();
12715 EVT ResVT = Op.getValueType();
12716
12717 // On PPC64, i32 carries are affected by the upper 32 bits of the registers.
12718 // We must zero-extend to i64 to ensure the carry reflects the 32-bit unsigned
12719 // comparison.
12720 if (Subtarget.isPPC64() && OpVT == MVT::i32) {
12721 A = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, A);
12722 B = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, B);
12723 OpVT = MVT::i64;
12724 }
12725
12726 // First compute diff = A - B.
12727 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12728
12729 // Generate B - A using SUBC to capture carry.
12730 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12731 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12732 SDValue CA0 = SubC.getValue(1);
12733
12734 // t2 = A - B + CA0 using SUBE.
12735 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12736 SDValue CA1 = SubE1.getValue(1);
12737
12738 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12739 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12740
12741 // Extract the first result and truncate to result type if needed.
12742 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12743}
12744
12745/// LowerOperation - Provide custom lowering hooks for some operations.
12746///
12748 switch (Op.getOpcode()) {
12749 default:
12750 llvm_unreachable("Wasn't expecting to be able to lower this!");
12751 case ISD::FPOW: return lowerPow(Op, DAG);
12752 case ISD::FSIN: return lowerSin(Op, DAG);
12753 case ISD::FCOS: return lowerCos(Op, DAG);
12754 case ISD::FLOG: return lowerLog(Op, DAG);
12755 case ISD::FLOG10: return lowerLog10(Op, DAG);
12756 case ISD::FEXP: return lowerExp(Op, DAG);
12757 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12758 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12759 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12760 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12761 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12762 case ISD::STRICT_FSETCC:
12764 case ISD::SETCC: return LowerSETCC(Op, DAG);
12765 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12766 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12767 case ISD::SSUBO:
12768 return LowerSSUBO(Op, DAG);
12769 case ISD::SADDO:
12770 return LowerSADDO(Op, DAG);
12771
12772 case ISD::INLINEASM:
12773 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12774 // Variable argument lowering.
12775 case ISD::VASTART: return LowerVASTART(Op, DAG);
12776 case ISD::VAARG: return LowerVAARG(Op, DAG);
12777 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12778
12779 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12780 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12782 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12783
12784 // Exception handling lowering.
12785 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12786 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12787 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12788
12789 case ISD::LOAD: return LowerLOAD(Op, DAG);
12790 case ISD::STORE: return LowerSTORE(Op, DAG);
12791 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12792 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12795 case ISD::FP_TO_UINT:
12796 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12799 case ISD::UINT_TO_FP:
12800 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12801 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12802 case ISD::SET_ROUNDING:
12803 return LowerSET_ROUNDING(Op, DAG);
12804
12805 // Lower 64-bit shifts.
12806 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12807 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12808 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12809
12810 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12811 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12812
12813 // Vector-related lowering.
12814 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12815 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12816 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12817 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12818 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12819 case ISD::MUL: return LowerMUL(Op, DAG);
12820 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12822 case ISD::FP_ROUND:
12823 return LowerFP_ROUND(Op, DAG);
12824 case ISD::ROTL: return LowerROTL(Op, DAG);
12825
12826 // For counter-based loop handling.
12828 return SDValue();
12829
12830 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12831
12832 // Frame & Return address.
12833 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12834 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12835
12837 return LowerINTRINSIC_VOID(Op, DAG);
12838 case ISD::BSWAP:
12839 return LowerBSWAP(Op, DAG);
12841 return LowerATOMIC_CMP_SWAP(Op, DAG);
12842 case ISD::ATOMIC_STORE:
12843 return LowerATOMIC_LOAD_STORE(Op, DAG);
12844 case ISD::IS_FPCLASS:
12845 return LowerIS_FPCLASS(Op, DAG);
12846 case ISD::UADDO:
12847 case ISD::USUBO:
12848 return LowerADDSUBO(Op, DAG);
12849 case ISD::UADDO_CARRY:
12850 case ISD::USUBO_CARRY:
12851 return LowerADDSUBO_CARRY(Op, DAG);
12852 case ISD::UCMP:
12853 return LowerUCMP(Op, DAG);
12854 case ISD::STRICT_LRINT:
12855 case ISD::STRICT_LLRINT:
12856 case ISD::STRICT_LROUND:
12859 if (Op->getFlags().hasNoFPExcept())
12860 return Op;
12861 return SDValue();
12862 case ISD::VP_LOAD:
12863 return LowerVP_LOAD(Op, DAG);
12864 case ISD::VP_STORE:
12865 return LowerVP_STORE(Op, DAG);
12866 }
12867}
12868
12871 SelectionDAG &DAG) const {
12872 SDLoc dl(N);
12873 switch (N->getOpcode()) {
12874 default:
12875 llvm_unreachable("Do not know how to custom type legalize this operation!");
12876 case ISD::ATOMIC_LOAD: {
12877 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12878 Results.push_back(Res);
12879 Results.push_back(Res.getValue(1));
12880 break;
12881 }
12882 case ISD::READCYCLECOUNTER: {
12883 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12884 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12885
12886 Results.push_back(
12887 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12888 Results.push_back(RTB.getValue(2));
12889 break;
12890 }
12892 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12893 break;
12894
12895 assert(N->getValueType(0) == MVT::i1 &&
12896 "Unexpected result type for CTR decrement intrinsic");
12897 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12898 N->getValueType(0));
12899 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12900 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12901 N->getOperand(1));
12902
12903 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12904 Results.push_back(NewInt.getValue(1));
12905 break;
12906 }
12908 switch (N->getConstantOperandVal(0)) {
12909 case Intrinsic::ppc_pack_longdouble:
12910 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12911 N->getOperand(2), N->getOperand(1)));
12912 break;
12913 case Intrinsic::ppc_maxfe:
12914 case Intrinsic::ppc_minfe:
12915 case Intrinsic::ppc_fnmsub:
12916 case Intrinsic::ppc_convert_f128_to_ppcf128:
12917 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12918 break;
12919 }
12920 break;
12921 }
12922 case ISD::VAARG: {
12923 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12924 return;
12925
12926 EVT VT = N->getValueType(0);
12927
12928 if (VT == MVT::i64) {
12929 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12930
12931 Results.push_back(NewNode);
12932 Results.push_back(NewNode.getValue(1));
12933 }
12934 return;
12935 }
12938 case ISD::FP_TO_SINT:
12939 case ISD::FP_TO_UINT: {
12940 // LowerFP_TO_INT() can only handle f32 and f64.
12941 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12942 MVT::ppcf128)
12943 return;
12944 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12945 Results.push_back(LoweredValue);
12946 if (N->isStrictFPOpcode())
12947 Results.push_back(LoweredValue.getValue(1));
12948 return;
12949 }
12950 case ISD::TRUNCATE: {
12951 if (!N->getValueType(0).isVector())
12952 return;
12953 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12954 if (Lowered)
12955 Results.push_back(Lowered);
12956 return;
12957 }
12958 case ISD::SCALAR_TO_VECTOR: {
12959 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12960 if (Lowered)
12961 Results.push_back(Lowered);
12962 return;
12963 }
12964 case ISD::FSHL:
12965 case ISD::FSHR:
12966 // Don't handle funnel shifts here.
12967 return;
12968 case ISD::BITCAST:
12969 // Don't handle bitcast here.
12970 return;
12971 case ISD::FP_EXTEND:
12972 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12973 if (Lowered)
12974 Results.push_back(Lowered);
12975 return;
12976 }
12977}
12978
12979//===----------------------------------------------------------------------===//
12980// Other Lowering Code
12981//===----------------------------------------------------------------------===//
12982
12984 return Builder.CreateIntrinsic(Id, {});
12985}
12986
12988 Value *Addr,
12989 AtomicOrdering Ord) const {
12990 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12991
12992 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12993 "Only 8/16/32/64-bit atomic loads supported");
12994 Intrinsic::ID IntID;
12995 switch (SZ) {
12996 default:
12997 llvm_unreachable("Unexpected PrimitiveSize");
12998 case 8:
12999 IntID = Intrinsic::ppc_lbarx;
13000 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13001 break;
13002 case 16:
13003 IntID = Intrinsic::ppc_lharx;
13004 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13005 break;
13006 case 32:
13007 IntID = Intrinsic::ppc_lwarx;
13008 break;
13009 case 64:
13010 IntID = Intrinsic::ppc_ldarx;
13011 break;
13012 }
13013 Value *Call =
13014 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
13015
13016 return Builder.CreateTruncOrBitCast(Call, ValueTy);
13017}
13018
13019// Perform a store-conditional operation to Addr. Return the status of the
13020// store. This should be 0 if the store succeeded, non-zero otherwise.
13022 Value *Val, Value *Addr,
13023 AtomicOrdering Ord) const {
13024 Type *Ty = Val->getType();
13025 unsigned SZ = Ty->getPrimitiveSizeInBits();
13026
13027 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
13028 "Only 8/16/32/64-bit atomic loads supported");
13029 Intrinsic::ID IntID;
13030 switch (SZ) {
13031 default:
13032 llvm_unreachable("Unexpected PrimitiveSize");
13033 case 8:
13034 IntID = Intrinsic::ppc_stbcx;
13035 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13036 break;
13037 case 16:
13038 IntID = Intrinsic::ppc_sthcx;
13039 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13040 break;
13041 case 32:
13042 IntID = Intrinsic::ppc_stwcx;
13043 break;
13044 case 64:
13045 IntID = Intrinsic::ppc_stdcx;
13046 break;
13047 }
13048
13049 if (SZ == 8 || SZ == 16)
13050 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
13051
13052 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
13053 /*FMFSource=*/nullptr, "stcx");
13054 return Builder.CreateXor(Call, Builder.getInt32(1));
13055}
13056
13057// The mappings for emitLeading/TrailingFence is taken from
13058// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
13060 Instruction *Inst,
13061 AtomicOrdering Ord) const {
13063 return callIntrinsic(Builder, Intrinsic::ppc_sync);
13064 if (isReleaseOrStronger(Ord))
13065 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13066 return nullptr;
13067}
13068
13070 Instruction *Inst,
13071 AtomicOrdering Ord) const {
13072 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
13073 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
13074 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
13075 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
13076 if (isa<LoadInst>(Inst))
13077 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
13078 {Inst});
13079 // FIXME: Can use isync for rmw operation.
13080 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
13081 }
13082 return nullptr;
13083}
13084
13087 unsigned BinOpcode,
13088 unsigned CmpOpcode,
13089 unsigned CmpPred) const {
13090 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13091 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13092 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13093 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13094 unsigned AtomicSize = MI.getOperand(3).getImm();
13095
13096 auto LoadMnemonic = PPC::LDARX;
13097 auto StoreMnemonic = PPC::STDCX;
13098 switch (AtomicSize) {
13099 default:
13100 llvm_unreachable("Unexpected size of atomic entity");
13101 case 1:
13102 LoadMnemonic = PPC::LBARX;
13103 StoreMnemonic = PPC::STBCX;
13104 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13105 break;
13106 case 2:
13107 LoadMnemonic = PPC::LHARX;
13108 StoreMnemonic = PPC::STHCX;
13109 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13110 break;
13111 case 4:
13112 LoadMnemonic = PPC::LWARX;
13113 StoreMnemonic = PPC::STWCX;
13114 break;
13115 case 8:
13116 LoadMnemonic = PPC::LDARX;
13117 StoreMnemonic = PPC::STDCX;
13118 break;
13119 }
13120
13121 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13122 MachineFunction *F = BB->getParent();
13124
13125 if (CmpOpcode == PPC::CMPW && (AtomicSize == 1 || AtomicSize == 2))
13126 signExtendOperandIfUnknown(MI, BB, 4, /*IsByte=*/AtomicSize == 1, TII);
13127
13128 Register dest = MI.getOperand(0).getReg();
13129 Register ptrA = MI.getOperand(1).getReg();
13130 Register ptrB = MI.getOperand(2).getReg();
13131 Register incr = MI.getOperand(4).getReg();
13132 DebugLoc dl = MI.getDebugLoc();
13133
13134 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13135 MachineBasicBlock *loop2MBB =
13136 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13137 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13138 F->insert(It, loopMBB);
13139 if (CmpOpcode)
13140 F->insert(It, loop2MBB);
13141 F->insert(It, exitMBB);
13142 exitMBB->splice(exitMBB->begin(), BB,
13143 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13145
13146 MachineRegisterInfo &RegInfo = F->getRegInfo();
13147 Register TmpReg = (!BinOpcode) ? incr :
13148 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13149 : &PPC::GPRCRegClass);
13150
13151 // thisMBB:
13152 // ...
13153 // fallthrough --> loopMBB
13154 BB->addSuccessor(loopMBB);
13155
13156 // loopMBB:
13157 // l[wd]arx dest, ptr
13158 // add r0, dest, incr
13159 // st[wd]cx. r0, ptr
13160 // bne- loopMBB
13161 // fallthrough --> exitMBB
13162
13163 // For max/min...
13164 // loopMBB:
13165 // l[wd]arx dest, ptr
13166 // cmpl?[wd] dest, incr
13167 // bgt exitMBB
13168 // loop2MBB:
13169 // st[wd]cx. dest, ptr
13170 // bne- loopMBB
13171 // fallthrough --> exitMBB
13172
13173 BB = loopMBB;
13174 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13175 .addReg(ptrA).addReg(ptrB);
13176 if (BinOpcode)
13177 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13178 if (CmpOpcode) {
13179 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13180 // Signed comparisons of byte or halfword values must be sign-extended.
13181 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13182 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13183 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13184 ExtReg).addReg(dest);
13185 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13186 } else
13187 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13188
13189 BuildMI(BB, dl, TII->get(PPC::BCC))
13190 .addImm(CmpPred)
13191 .addReg(CrReg)
13192 .addMBB(exitMBB);
13193 BB->addSuccessor(loop2MBB);
13194 BB->addSuccessor(exitMBB);
13195 BB = loop2MBB;
13196 }
13197 BuildMI(BB, dl, TII->get(StoreMnemonic))
13198 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13199 BuildMI(BB, dl, TII->get(PPC::BCC))
13201 .addReg(PPC::CR0)
13202 .addMBB(loopMBB);
13203 BB->addSuccessor(loopMBB);
13204 BB->addSuccessor(exitMBB);
13205
13206 // exitMBB:
13207 // ...
13208 BB = exitMBB;
13209 return BB;
13210}
13211
13213 switch(MI.getOpcode()) {
13214 default:
13215 return false;
13216 case PPC::COPY:
13217 return TII->isSignExtended(MI.getOperand(1).getReg(),
13218 &MI.getMF()->getRegInfo());
13219 case PPC::LHA:
13220 case PPC::LHA8:
13221 case PPC::LHAU:
13222 case PPC::LHAU8:
13223 case PPC::LHAUX:
13224 case PPC::LHAUX8:
13225 case PPC::LHAX:
13226 case PPC::LHAX8:
13227 case PPC::LWA:
13228 case PPC::LWAUX:
13229 case PPC::LWAX:
13230 case PPC::LWAX_32:
13231 case PPC::LWA_32:
13232 case PPC::PLHA:
13233 case PPC::PLHA8:
13234 case PPC::PLHA8pc:
13235 case PPC::PLHApc:
13236 case PPC::PLWA:
13237 case PPC::PLWA8:
13238 case PPC::PLWA8pc:
13239 case PPC::PLWApc:
13240 case PPC::EXTSB:
13241 case PPC::EXTSB8:
13242 case PPC::EXTSB8_32_64:
13243 case PPC::EXTSB8_rec:
13244 case PPC::EXTSB_rec:
13245 case PPC::EXTSH:
13246 case PPC::EXTSH8:
13247 case PPC::EXTSH8_32_64:
13248 case PPC::EXTSH8_rec:
13249 case PPC::EXTSH_rec:
13250 case PPC::EXTSW:
13251 case PPC::EXTSWSLI:
13252 case PPC::EXTSWSLI_32_64:
13253 case PPC::EXTSWSLI_32_64_rec:
13254 case PPC::EXTSWSLI_rec:
13255 case PPC::EXTSW_32:
13256 case PPC::EXTSW_32_64:
13257 case PPC::EXTSW_32_64_rec:
13258 case PPC::EXTSW_rec:
13259 case PPC::SRAW:
13260 case PPC::SRAWI:
13261 case PPC::SRAWI_rec:
13262 case PPC::SRAW_rec:
13263 return true;
13264 }
13265 return false;
13266}
13267
13268// Sign extend operand OpIdx if the value is not known to be sign extended.
13269// Assumes the operand is a register. The flag IsByte controls which intruction
13270// is used for the sign extension.
13272 unsigned OpIdx, bool IsByte,
13273 const PPCInstrInfo *TII) {
13274 MachineFunction *F = MI.getMF();
13275 MachineRegisterInfo &RegInfo = F->getRegInfo();
13276 Register Reg = MI.getOperand(OpIdx).getReg();
13277 bool IsSignExtended =
13278 Reg.isVirtual() && isSignExtended(*RegInfo.getVRegDef(Reg), TII);
13279
13280 if (!IsSignExtended) {
13281 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13282 BuildMI(*BB, MI, MI.getDebugLoc(),
13283 TII->get(IsByte ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13284 .addReg(Reg);
13285 MI.getOperand(OpIdx).setReg(ValueReg);
13286 }
13287}
13288
13290 MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode,
13291 unsigned CmpOpcode, unsigned CmpPred) const {
13292 // BinOpcode != 0: Handles atomic load with binary operator, e.g. NAND.
13293 // CmpOpcode != 0: Handles atomic load with MIN/MAX etc.
13294 // BinOpcode == 0 && CmpOpcode == 0: Handles ATOMIC_SWAP.
13295 assert(!Subtarget.hasPartwordAtomics() &&
13296 "Assumes that part-word atomics are not available");
13297 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13298
13299 // If this is a signed comparison and the value being compared is not known
13300 // to be sign extended, sign extend it here.
13301 DebugLoc dl = MI.getDebugLoc();
13302 MachineFunction *F = BB->getParent();
13303 MachineRegisterInfo &RegInfo = F->getRegInfo();
13304 const bool is8bit = MI.getOperand(3).getImm() == 1;
13305 if (CmpOpcode == PPC::CMPW)
13306 signExtendOperandIfUnknown(MI, BB, 4, is8bit, TII);
13307 Register incr = MI.getOperand(4).getReg();
13308
13309 // In 64 bit mode we have to use 64 bits for addresses, even though the
13310 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13311 // registers without caring whether they're 32 or 64, but here we're
13312 // doing actual arithmetic on the addresses.
13313 bool is64bit = Subtarget.isPPC64();
13314 bool isLittleEndian = Subtarget.isLittleEndian();
13315 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13316
13317 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13319
13320 Register dest = MI.getOperand(0).getReg();
13321 Register ptrA = MI.getOperand(1).getReg();
13322 Register ptrB = MI.getOperand(2).getReg();
13323
13324 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13325 MachineBasicBlock *loop2MBB =
13326 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13327 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13328 F->insert(It, loopMBB);
13329 if (CmpOpcode)
13330 F->insert(It, loop2MBB);
13331 F->insert(It, exitMBB);
13332 exitMBB->splice(exitMBB->begin(), BB,
13333 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13335
13336 const TargetRegisterClass *RC =
13337 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13338 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13339
13340 Register PtrReg = RegInfo.createVirtualRegister(RC);
13341 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13342 Register ShiftReg =
13343 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13344 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13345 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13346 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13347 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13348 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13349 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13350 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13351 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13352 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13353 Register Ptr1Reg;
13354 Register TmpReg =
13355 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13356
13357 // thisMBB:
13358 // ...
13359 // fallthrough --> loopMBB
13360 BB->addSuccessor(loopMBB);
13361
13362 // The 4-byte load must be aligned, while a char or short may be
13363 // anywhere in the word. Hence all this nasty bookkeeping code.
13364 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13365 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13366 // xori shift, shift1, 24 [16]
13367 // rlwinm ptr, ptr1, 0, 0, 29
13368 // slw incr2, incr, shift
13369 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13370 // slw mask, mask2, shift
13371 // loopMBB:
13372 // lwarx tmpDest, ptr
13373 // add tmp, tmpDest, incr2
13374 // andc tmp2, tmpDest, mask
13375 // and tmp3, tmp, mask
13376 // or tmp4, tmp3, tmp2
13377 // stwcx. tmp4, ptr
13378 // bne- loopMBB
13379 // fallthrough --> exitMBB
13380 // srw SrwDest, tmpDest, shift
13381 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13382 if (ptrA != ZeroReg) {
13383 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13384 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13385 .addReg(ptrA)
13386 .addReg(ptrB);
13387 } else {
13388 Ptr1Reg = ptrB;
13389 }
13390 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13391 // mode.
13392 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13393 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
13394 .addImm(3)
13395 .addImm(27)
13396 .addImm(is8bit ? 28 : 27);
13397 if (!isLittleEndian)
13398 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13399 .addReg(Shift1Reg)
13400 .addImm(is8bit ? 24 : 16);
13401 if (is64bit)
13402 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13403 .addReg(Ptr1Reg)
13404 .addImm(0)
13405 .addImm(61);
13406 else
13407 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13408 .addReg(Ptr1Reg)
13409 .addImm(0)
13410 .addImm(0)
13411 .addImm(29);
13412 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13413 if (is8bit)
13414 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13415 else {
13416 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13417 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13418 .addReg(Mask3Reg)
13419 .addImm(65535);
13420 }
13421 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13422 .addReg(Mask2Reg)
13423 .addReg(ShiftReg);
13424
13425 BB = loopMBB;
13426 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13427 .addReg(ZeroReg)
13428 .addReg(PtrReg);
13429 if (BinOpcode)
13430 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13431 .addReg(Incr2Reg)
13432 .addReg(TmpDestReg);
13433 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13434 .addReg(TmpDestReg)
13435 .addReg(MaskReg);
13436 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13437 if (CmpOpcode) {
13438 // For unsigned comparisons, we can directly compare the shifted values.
13439 // For signed comparisons we shift and sign extend.
13440 Register SReg = RegInfo.createVirtualRegister(GPRC);
13441 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13442 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13443 .addReg(TmpDestReg)
13444 .addReg(MaskReg);
13445 unsigned ValueReg = SReg;
13446 unsigned CmpReg = Incr2Reg;
13447 if (CmpOpcode == PPC::CMPW) {
13448 ValueReg = RegInfo.createVirtualRegister(GPRC);
13449 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13450 .addReg(SReg)
13451 .addReg(ShiftReg);
13452 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13453 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13454 .addReg(ValueReg);
13455 ValueReg = ValueSReg;
13456 CmpReg = incr;
13457 }
13458 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13459 BuildMI(BB, dl, TII->get(PPC::BCC))
13460 .addImm(CmpPred)
13461 .addReg(CrReg)
13462 .addMBB(exitMBB);
13463 BB->addSuccessor(loop2MBB);
13464 BB->addSuccessor(exitMBB);
13465 BB = loop2MBB;
13466 }
13467 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13468 BuildMI(BB, dl, TII->get(PPC::STWCX))
13469 .addReg(Tmp4Reg)
13470 .addReg(ZeroReg)
13471 .addReg(PtrReg);
13472 BuildMI(BB, dl, TII->get(PPC::BCC))
13474 .addReg(PPC::CR0)
13475 .addMBB(loopMBB);
13476 BB->addSuccessor(loopMBB);
13477 BB->addSuccessor(exitMBB);
13478
13479 // exitMBB:
13480 // ...
13481 BB = exitMBB;
13482 // Since the shift amount is not a constant, we need to clear
13483 // the upper bits with a separate RLWINM.
13484 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13485 .addReg(SrwDestReg)
13486 .addImm(0)
13487 .addImm(is8bit ? 24 : 16)
13488 .addImm(31);
13489 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13490 .addReg(TmpDestReg)
13491 .addReg(ShiftReg);
13492 return BB;
13493}
13494
13497 MachineBasicBlock *MBB) const {
13498 DebugLoc DL = MI.getDebugLoc();
13499 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13500 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13501
13502 MachineFunction *MF = MBB->getParent();
13503 MachineRegisterInfo &MRI = MF->getRegInfo();
13504
13505 const BasicBlock *BB = MBB->getBasicBlock();
13506 MachineFunction::iterator I = ++MBB->getIterator();
13507
13508 Register DstReg = MI.getOperand(0).getReg();
13509 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13510 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13511 Register mainDstReg = MRI.createVirtualRegister(RC);
13512 Register restoreDstReg = MRI.createVirtualRegister(RC);
13513
13514 MVT PVT = getPointerTy(MF->getDataLayout());
13515 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13516 "Invalid Pointer Size!");
13517 // For v = setjmp(buf), we generate
13518 //
13519 // thisMBB:
13520 // SjLjSetup mainMBB
13521 // bl mainMBB
13522 // v_restore = 1
13523 // b sinkMBB
13524 //
13525 // mainMBB:
13526 // buf[LabelOffset] = LR
13527 // v_main = 0
13528 //
13529 // sinkMBB:
13530 // v = phi(main, restore)
13531 //
13532
13533 MachineBasicBlock *thisMBB = MBB;
13534 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13535 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13536 MF->insert(I, mainMBB);
13537 MF->insert(I, sinkMBB);
13538
13540
13541 // Transfer the remainder of BB and its successor edges to sinkMBB.
13542 sinkMBB->splice(sinkMBB->begin(), MBB,
13543 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13545
13546 // Note that the structure of the jmp_buf used here is not compatible
13547 // with that used by libc, and is not designed to be. Specifically, it
13548 // stores only those 'reserved' registers that LLVM does not otherwise
13549 // understand how to spill. Also, by convention, by the time this
13550 // intrinsic is called, Clang has already stored the frame address in the
13551 // first slot of the buffer and stack address in the third. Following the
13552 // X86 target code, we'll store the jump address in the second slot. We also
13553 // need to save the TOC pointer (R2) to handle jumps between shared
13554 // libraries, and that will be stored in the fourth slot. The thread
13555 // identifier (R13) is not affected.
13556
13557 // thisMBB:
13558 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13559 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13560 const int64_t BPOffset = 4 * PVT.getStoreSize();
13561
13562 // Prepare IP either in reg.
13563 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13564 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13565 Register BufReg = MI.getOperand(1).getReg();
13566
13567 if (Subtarget.is64BitELFABI()) {
13568 setUsesTOCBasePtr(*MBB->getParent());
13569 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13570 .addReg(PPC::X2)
13571 .addImm(TOCOffset)
13572 .addReg(BufReg)
13573 .cloneMemRefs(MI);
13574 }
13575
13576 // Naked functions never have a base pointer, and so we use r1. For all
13577 // other functions, this decision must be delayed until during PEI.
13578 unsigned BaseReg;
13579 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13580 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13581 else
13582 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13583
13584 MIB = BuildMI(*thisMBB, MI, DL,
13585 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13586 .addReg(BaseReg)
13587 .addImm(BPOffset)
13588 .addReg(BufReg)
13589 .cloneMemRefs(MI);
13590
13591 // Setup
13592 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13593 MIB.addRegMask(TRI->getNoPreservedMask());
13594
13595 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13596
13597 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13598 .addMBB(mainMBB);
13599 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13600
13601 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13602 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13603
13604 // mainMBB:
13605 // mainDstReg = 0
13606 MIB =
13607 BuildMI(mainMBB, DL,
13608 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13609
13610 // Store IP
13611 if (Subtarget.isPPC64()) {
13612 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13613 .addReg(LabelReg)
13614 .addImm(LabelOffset)
13615 .addReg(BufReg);
13616 } else {
13617 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13618 .addReg(LabelReg)
13619 .addImm(LabelOffset)
13620 .addReg(BufReg);
13621 }
13622 MIB.cloneMemRefs(MI);
13623
13624 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13625 mainMBB->addSuccessor(sinkMBB);
13626
13627 // sinkMBB:
13628 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13629 TII->get(PPC::PHI), DstReg)
13630 .addReg(mainDstReg).addMBB(mainMBB)
13631 .addReg(restoreDstReg).addMBB(thisMBB);
13632
13633 MI.eraseFromParent();
13634 return sinkMBB;
13635}
13636
13639 MachineBasicBlock *MBB) const {
13640 DebugLoc DL = MI.getDebugLoc();
13641 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13642
13643 MachineFunction *MF = MBB->getParent();
13644 MachineRegisterInfo &MRI = MF->getRegInfo();
13645
13646 MVT PVT = getPointerTy(MF->getDataLayout());
13647 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13648 "Invalid Pointer Size!");
13649
13650 const TargetRegisterClass *RC =
13651 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13652 Register Tmp = MRI.createVirtualRegister(RC);
13653 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13654 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13655 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13656 unsigned BP =
13657 (PVT == MVT::i64)
13658 ? PPC::X30
13659 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13660 : PPC::R30);
13661
13663
13664 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13665 const int64_t SPOffset = 2 * PVT.getStoreSize();
13666 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13667 const int64_t BPOffset = 4 * PVT.getStoreSize();
13668
13669 Register BufReg = MI.getOperand(0).getReg();
13670
13671 // Reload FP (the jumped-to function may not have had a
13672 // frame pointer, and if so, then its r31 will be restored
13673 // as necessary).
13674 if (PVT == MVT::i64) {
13675 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13676 .addImm(0)
13677 .addReg(BufReg);
13678 } else {
13679 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13680 .addImm(0)
13681 .addReg(BufReg);
13682 }
13683 MIB.cloneMemRefs(MI);
13684
13685 // Reload IP
13686 if (PVT == MVT::i64) {
13687 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13688 .addImm(LabelOffset)
13689 .addReg(BufReg);
13690 } else {
13691 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13692 .addImm(LabelOffset)
13693 .addReg(BufReg);
13694 }
13695 MIB.cloneMemRefs(MI);
13696
13697 // Reload SP
13698 if (PVT == MVT::i64) {
13699 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13700 .addImm(SPOffset)
13701 .addReg(BufReg);
13702 } else {
13703 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13704 .addImm(SPOffset)
13705 .addReg(BufReg);
13706 }
13707 MIB.cloneMemRefs(MI);
13708
13709 // Reload BP
13710 if (PVT == MVT::i64) {
13711 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13712 .addImm(BPOffset)
13713 .addReg(BufReg);
13714 } else {
13715 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13716 .addImm(BPOffset)
13717 .addReg(BufReg);
13718 }
13719 MIB.cloneMemRefs(MI);
13720
13721 // Reload TOC
13722 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13723 setUsesTOCBasePtr(*MBB->getParent());
13724 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13725 .addImm(TOCOffset)
13726 .addReg(BufReg)
13727 .cloneMemRefs(MI);
13728 }
13729
13730 // Jump
13731 BuildMI(*MBB, MI, DL,
13732 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13733 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13734
13735 MI.eraseFromParent();
13736 return MBB;
13737}
13738
13740 // If the function specifically requests inline stack probes, emit them.
13741 if (MF.getFunction().hasFnAttribute("probe-stack"))
13742 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13743 "inline-asm";
13744 return false;
13745}
13746
13748 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13749 unsigned StackAlign = TFI->getStackAlignment();
13750 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13751 "Unexpected stack alignment");
13752 // The default stack probe size is 4096 if the function has no
13753 // stack-probe-size attribute.
13754 const Function &Fn = MF.getFunction();
13755 unsigned StackProbeSize =
13756 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13757 // Round down to the stack alignment.
13758 StackProbeSize &= ~(StackAlign - 1);
13759 return StackProbeSize ? StackProbeSize : StackAlign;
13760}
13761
13762// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13763// into three phases. In the first phase, it uses pseudo instruction
13764// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13765// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13766// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13767// MaxCallFrameSize so that it can calculate correct data area pointer.
13770 MachineBasicBlock *MBB) const {
13771 const bool isPPC64 = Subtarget.isPPC64();
13772 MachineFunction *MF = MBB->getParent();
13773 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13774 DebugLoc DL = MI.getDebugLoc();
13775 const unsigned ProbeSize = getStackProbeSize(*MF);
13776 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13777 MachineRegisterInfo &MRI = MF->getRegInfo();
13778 // The CFG of probing stack looks as
13779 // +-----+
13780 // | MBB |
13781 // +--+--+
13782 // |
13783 // +----v----+
13784 // +--->+ TestMBB +---+
13785 // | +----+----+ |
13786 // | | |
13787 // | +-----v----+ |
13788 // +---+ BlockMBB | |
13789 // +----------+ |
13790 // |
13791 // +---------+ |
13792 // | TailMBB +<--+
13793 // +---------+
13794 // In MBB, calculate previous frame pointer and final stack pointer.
13795 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13796 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13797 // TailMBB is spliced via \p MI.
13798 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13799 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13800 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13801
13802 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13803 MF->insert(MBBIter, TestMBB);
13804 MF->insert(MBBIter, BlockMBB);
13805 MF->insert(MBBIter, TailMBB);
13806
13807 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13808 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13809
13810 Register DstReg = MI.getOperand(0).getReg();
13811 Register NegSizeReg = MI.getOperand(1).getReg();
13812 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13813 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13814 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13815 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13816
13817 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13818 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13819 // NegSize.
13820 unsigned ProbeOpc;
13821 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13822 ProbeOpc =
13823 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13824 else
13825 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13826 // and NegSizeReg will be allocated in the same phyreg to avoid
13827 // redundant copy when NegSizeReg has only one use which is current MI and
13828 // will be replaced by PREPARE_PROBED_ALLOCA then.
13829 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13830 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13831 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13832 .addDef(ActualNegSizeReg)
13833 .addReg(NegSizeReg)
13834 .add(MI.getOperand(2))
13835 .add(MI.getOperand(3));
13836
13837 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13838 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13839 FinalStackPtr)
13840 .addReg(SPReg)
13841 .addReg(ActualNegSizeReg);
13842
13843 // Materialize a scratch register for update.
13844 int64_t NegProbeSize = -(int64_t)ProbeSize;
13845 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13846 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13847 if (!isInt<16>(NegProbeSize)) {
13848 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13849 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13850 .addImm(NegProbeSize >> 16);
13851 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13852 ScratchReg)
13853 .addReg(TempReg)
13854 .addImm(NegProbeSize & 0xFFFF);
13855 } else
13856 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13857 .addImm(NegProbeSize);
13858
13859 {
13860 // Probing leading residual part.
13861 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13862 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13863 .addReg(ActualNegSizeReg)
13864 .addReg(ScratchReg);
13865 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13866 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13867 .addReg(Div)
13868 .addReg(ScratchReg);
13869 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13870 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13871 .addReg(Mul)
13872 .addReg(ActualNegSizeReg);
13873 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13874 .addReg(FramePointer)
13875 .addReg(SPReg)
13876 .addReg(NegMod);
13877 }
13878
13879 {
13880 // Remaining part should be multiple of ProbeSize.
13881 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13882 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13883 .addReg(SPReg)
13884 .addReg(FinalStackPtr);
13885 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13887 .addReg(CmpResult)
13888 .addMBB(TailMBB);
13889 TestMBB->addSuccessor(BlockMBB);
13890 TestMBB->addSuccessor(TailMBB);
13891 }
13892
13893 {
13894 // Touch the block.
13895 // |P...|P...|P...
13896 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13897 .addReg(FramePointer)
13898 .addReg(SPReg)
13899 .addReg(ScratchReg);
13900 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13901 BlockMBB->addSuccessor(TestMBB);
13902 }
13903
13904 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13905 // DYNAREAOFFSET pseudo instruction to get the future result.
13906 Register MaxCallFrameSizeReg =
13907 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13908 BuildMI(TailMBB, DL,
13909 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13910 MaxCallFrameSizeReg)
13911 .add(MI.getOperand(2))
13912 .add(MI.getOperand(3));
13913 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13914 .addReg(SPReg)
13915 .addReg(MaxCallFrameSizeReg);
13916
13917 // Splice instructions after MI to TailMBB.
13918 TailMBB->splice(TailMBB->end(), MBB,
13919 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13921 MBB->addSuccessor(TestMBB);
13922
13923 // Delete the pseudo instruction.
13924 MI.eraseFromParent();
13925
13926 ++NumDynamicAllocaProbed;
13927 return TailMBB;
13928}
13929
13930/// Check if the opcode is a SELECT or SELECT_CC variant.
13931/// @param Opcode The opcode to check
13932/// @param CheckOnlyCC If true, only return true for SELECT_CC variants;
13933/// if false, return true for both SELECT and SELECT_CC
13934static bool IsSelect(unsigned Opcode, bool CheckOnlyCC = false) {
13935 switch (Opcode) {
13936 // SELECT_CC variants - always return true
13937 case PPC::SELECT_CC_I4:
13938 case PPC::SELECT_CC_I8:
13939 case PPC::SELECT_CC_F4:
13940 case PPC::SELECT_CC_F8:
13941 case PPC::SELECT_CC_F16:
13942 case PPC::SELECT_CC_VRRC:
13943 case PPC::SELECT_CC_VSFRC:
13944 case PPC::SELECT_CC_VSSRC:
13945 case PPC::SELECT_CC_VSRC:
13946 case PPC::SELECT_CC_SPE4:
13947 case PPC::SELECT_CC_SPE:
13948 return true;
13949 // SELECT variants - only return true if CheckOnlyCC is false
13950 case PPC::SELECT_I4:
13951 case PPC::SELECT_I8:
13952 case PPC::SELECT_F4:
13953 case PPC::SELECT_F8:
13954 case PPC::SELECT_F16:
13955 case PPC::SELECT_SPE:
13956 case PPC::SELECT_SPE4:
13957 case PPC::SELECT_VRRC:
13958 case PPC::SELECT_VSFRC:
13959 case PPC::SELECT_VSSRC:
13960 case PPC::SELECT_VSRC:
13961 return !CheckOnlyCC; // true if checking all SELECTs, false if only CC
13962 default:
13963 return false;
13964 }
13965}
13966static bool IsSelectCC(unsigned Opcode) { return IsSelect(Opcode, true); }
13967
13968/// Emit SELECT instruction, using ISEL if available, otherwise use
13969/// branch-based control flow.
13970///
13971/// For targets with ISEL support (SELECT_CC_I4/I8, SELECT_I4/I8), this
13972/// generates a single ISEL instruction. Otherwise, it creates a
13973/// branch-based control flow pattern with PHI nodes.
13975 const TargetInstrInfo *TII,
13976 const PPCSubtarget &Subtarget) {
13977 assert(IsSelect(MI.getOpcode()) && "Instruction must be a SELECT variant");
13978
13979 // Check if we can use ISEL for this SELECT
13980 if (Subtarget.hasISEL() &&
13981 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13982 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13983 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13985 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13986 MI.getOpcode() == PPC::SELECT_CC_I8)
13987 Cond.push_back(MI.getOperand(4));
13988 else
13990 Cond.push_back(MI.getOperand(1));
13991
13992 DebugLoc dl = MI.getDebugLoc();
13993 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13994 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13995 MI.eraseFromParent();
13996 return BB;
13997 }
13998
13999 // Fall back to branch-based SELECT implementation
14000 MachineFunction *F = BB->getParent();
14001 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14003 DebugLoc dl = MI.getDebugLoc();
14004
14005 MachineBasicBlock *thisMBB = BB;
14006 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
14007 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14008 F->insert(It, copy0MBB);
14009 F->insert(It, sinkMBB);
14010
14011 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
14012 copy0MBB->addLiveIn(PPC::CARRY);
14013 sinkMBB->addLiveIn(PPC::CARRY);
14014 }
14015
14016 // Set the call frame size on entry to the new basic blocks.
14017 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
14018 copy0MBB->setCallFrameSize(CallFrameSize);
14019 sinkMBB->setCallFrameSize(CallFrameSize);
14020
14021 // Transfer the remainder of BB and its successor edges to sinkMBB.
14022 sinkMBB->splice(sinkMBB->begin(), BB,
14023 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14025
14026 // Add successors
14027 BB->addSuccessor(copy0MBB);
14028 BB->addSuccessor(sinkMBB);
14029
14030 // Build branch instruction
14031 if (IsSelectCC(MI.getOpcode()))
14032 BuildMI(BB, dl, TII->get(PPC::BCC))
14033 .addImm(MI.getOperand(4).getImm())
14034 .addReg(MI.getOperand(1).getReg())
14035 .addMBB(sinkMBB);
14036 else
14037 BuildMI(BB, dl, TII->get(PPC::BC))
14038 .addReg(MI.getOperand(1).getReg())
14039 .addMBB(sinkMBB);
14040
14041 // copy0MBB: fallthrough to sinkMBB
14042 BB = copy0MBB;
14043 BB->addSuccessor(sinkMBB);
14044
14045 // sinkMBB: PHI instruction
14046 BB = sinkMBB;
14047 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
14048 .addReg(MI.getOperand(3).getReg())
14049 .addMBB(copy0MBB)
14050 .addReg(MI.getOperand(2).getReg())
14051 .addMBB(thisMBB);
14052 MI.eraseFromParent();
14053 return BB;
14054}
14055
14056/// Helper function to create basic blocks for atomic compare-and-swap.
14057/// Creates three basic blocks (loop1MBB, loop2MBB, exitMBB) and sets up
14058/// the control flow structure common to both hardware and software
14059/// implementations of atomic compare-and-swap operations.
14061 MachineBasicBlock *&loop1MBB,
14062 MachineBasicBlock *&loop2MBB,
14063 MachineBasicBlock *&exitMBB,
14066 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14067 loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14068 loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14069 exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14070 F->insert(It, loop1MBB);
14071 F->insert(It, loop2MBB);
14072 F->insert(It, exitMBB);
14073 exitMBB->splice(exitMBB->begin(), BB,
14074 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14076 BB->addSuccessor(loop1MBB);
14077}
14078
14079/// Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16
14080/// with partword atomic support.
14081///
14082/// This uses native PowerPC atomic instructions (LBARX/LHARX/LWARX/LDARX for
14083/// load-and-reserve, STBCX/STHCX/STWCX/STDCX for store-conditional) to
14084/// implement atomic compare-and-swap at byte, halfword, word, or doubleword
14085/// granularity.
14086///
14087/// Control flow:
14088/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14089/// | |
14090/// +------------+
14091///
14092/// loop1MBB:
14093/// - Load-and-reserve from memory
14094/// - Compare loaded value with expected old value
14095/// - Branch to exitMBB if not equal (CAS failed)
14096/// loop2MBB:
14097/// - Store-conditional new value to memory
14098/// - Branch back to loop1MBB if store failed (retry)
14099/// - Fall through to exitMBB on success
14100static MachineBasicBlock *
14102 const TargetInstrInfo *TII,
14103 const PPCSubtarget &Subtarget) {
14104 MachineFunction *F = BB->getParent();
14106
14107 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14108
14109 unsigned LoadMnemonic = PPC::LDARX;
14110 unsigned StoreMnemonic = PPC::STDCX;
14111 switch (MI.getOpcode()) {
14112 default:
14113 llvm_unreachable("Compare and swap of unknown size");
14114 case PPC::ATOMIC_CMP_SWAP_I8:
14115 LoadMnemonic = PPC::LBARX;
14116 StoreMnemonic = PPC::STBCX;
14117 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14118 break;
14119 case PPC::ATOMIC_CMP_SWAP_I16:
14120 LoadMnemonic = PPC::LHARX;
14121 StoreMnemonic = PPC::STHCX;
14122 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14123 break;
14124 case PPC::ATOMIC_CMP_SWAP_I32:
14125 LoadMnemonic = PPC::LWARX;
14126 StoreMnemonic = PPC::STWCX;
14127 break;
14128 case PPC::ATOMIC_CMP_SWAP_I64:
14129 LoadMnemonic = PPC::LDARX;
14130 StoreMnemonic = PPC::STDCX;
14131 break;
14132 }
14133
14134 MachineRegisterInfo &RegInfo = F->getRegInfo();
14135 Register dest = MI.getOperand(0).getReg();
14136 Register ptrA = MI.getOperand(1).getReg();
14137 Register ptrB = MI.getOperand(2).getReg();
14138 Register oldval = MI.getOperand(3).getReg();
14139 Register newval = MI.getOperand(4).getReg();
14140 DebugLoc dl = MI.getDebugLoc();
14141
14142 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14143 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14144
14145 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14146
14147 // loop1MBB:
14148 // l[bhwd]arx dest, ptr
14149 // cmp[wd] dest, oldval
14150 // bne- exitBB
14151 BB = loop1MBB;
14152 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14153 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14154 .addReg(dest)
14155 .addReg(oldval);
14156 BuildMI(BB, dl, TII->get(PPC::BCC))
14158 .addReg(CrReg)
14159 .addMBB(exitMBB);
14160 BB->addSuccessor(loop2MBB);
14161 BB->addSuccessor(exitMBB);
14162
14163 // loop2MBB:
14164 // st[bhwd]cx. newval, ptr
14165 // bne- loopMBB
14166 // b exitBB
14167 BB = loop2MBB;
14168 BuildMI(BB, dl, TII->get(StoreMnemonic))
14169 .addReg(newval)
14170 .addReg(ptrA)
14171 .addReg(ptrB);
14172 BuildMI(BB, dl, TII->get(PPC::BCC))
14174 .addReg(PPC::CR0)
14175 .addMBB(loop1MBB);
14176 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14177 BB->addSuccessor(loop1MBB);
14178 BB->addSuccessor(exitMBB);
14179
14180 return exitMBB;
14181}
14182
14183/// Emit software-emulated atomic compare-and-swap for I8/I16 without
14184/// hardware partword atomic support.
14185///
14186/// This emulates byte/halfword atomic operations using word (32-bit) atomic
14187/// instructions. Since PowerPC atomic instructions work at word granularity,
14188/// we must:
14189/// 1. Align the pointer to a word boundary
14190/// 2. Calculate the bit shift for the target byte/halfword within the word
14191/// 3. Create masks to isolate the target byte/halfword
14192/// 4. Shift old/new values into the correct bit position
14193/// 5. Use LWARX/STWCX on the full word
14194/// 6. Mask and merge to preserve other bytes in the word
14195/// 7. Extract and shift the result back
14196///
14197/// Control flow:
14198/// thisMBB -> loop1MBB -> loop2MBB -> exitMBB
14199/// | |
14200/// +------------+
14201///
14202/// loop1MBB:
14203/// - LWARX: Load-and-reserve full word
14204/// - Mask to extract target byte/halfword
14205/// - Compare with expected old value
14206/// - Branch to exitMBB if not equal (CAS failed)
14207/// loop2MBB:
14208/// - Merge new value with other bytes in the word
14209/// - STWCX: Store-conditional full word
14210/// - Branch back to loop1MBB if store failed (retry)
14211/// - Fall through to exitMBB on success
14212/// exitMBB:
14213/// - Extract and return the loaded value
14214static MachineBasicBlock *
14216 const TargetInstrInfo *TII,
14217 const PPCSubtarget &Subtarget) {
14218 MachineFunction *F = BB->getParent();
14220
14221 bool is64bit = Subtarget.isPPC64();
14222 bool isLittleEndian = Subtarget.isLittleEndian();
14223 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14224
14225 Register dest = MI.getOperand(0).getReg();
14226 Register ptrA = MI.getOperand(1).getReg();
14227 Register ptrB = MI.getOperand(2).getReg();
14228 Register oldval = MI.getOperand(3).getReg();
14229 Register newval = MI.getOperand(4).getReg();
14230 DebugLoc dl = MI.getDebugLoc();
14231
14232 MachineBasicBlock *loop1MBB, *loop2MBB, *exitMBB;
14233 createAtomicLoopBlocks(F, BB, loop1MBB, loop2MBB, exitMBB, MI, It);
14234
14235 MachineRegisterInfo &RegInfo = F->getRegInfo();
14236 const TargetRegisterClass *RC =
14237 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14238 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14239
14240 // Lambda to create virtual registers
14241 auto createVReg = [&](const TargetRegisterClass *RC) {
14242 return RegInfo.createVirtualRegister(RC);
14243 };
14244
14245 Register PtrReg = createVReg(RC);
14246 Register Shift1Reg = createVReg(GPRC);
14247 Register ShiftReg = isLittleEndian ? Shift1Reg : createVReg(GPRC);
14248 Register NewVal2Reg = createVReg(GPRC);
14249 Register NewVal3Reg = createVReg(GPRC);
14250 Register OldVal2Reg = createVReg(GPRC);
14251 Register OldVal3Reg = createVReg(GPRC);
14252 Register MaskReg = createVReg(GPRC);
14253 Register Mask2Reg = createVReg(GPRC);
14254 Register Mask3Reg = createVReg(GPRC);
14255 Register Tmp2Reg = createVReg(GPRC);
14256 Register Tmp4Reg = createVReg(GPRC);
14257 Register TmpDestReg = createVReg(GPRC);
14258 Register TmpReg = createVReg(GPRC);
14259 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14260 Register CrReg = createVReg(&PPC::CRRCRegClass);
14261
14262 // Compute aligned pointer and shift amount
14263 Register Ptr1Reg;
14264 if (ptrA != ZeroReg) {
14265 Ptr1Reg = createVReg(RC);
14266 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14267 .addReg(ptrA)
14268 .addReg(ptrB);
14269 } else {
14270 Ptr1Reg = ptrB;
14271 }
14272
14273 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14274 .addReg(Ptr1Reg, {}, is64bit ? PPC::sub_32 : 0)
14275 .addImm(3)
14276 .addImm(27)
14277 .addImm(is8bit ? 28 : 27);
14278 if (!isLittleEndian)
14279 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14280 .addReg(Shift1Reg)
14281 .addImm(is8bit ? 24 : 16);
14282 if (is64bit)
14283 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14284 .addReg(Ptr1Reg)
14285 .addImm(0)
14286 .addImm(61);
14287 else
14288 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14289 .addReg(Ptr1Reg)
14290 .addImm(0)
14291 .addImm(0)
14292 .addImm(29);
14293
14294 // Prepare masked values
14295 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14296 .addReg(newval)
14297 .addReg(ShiftReg);
14298 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14299 .addReg(oldval)
14300 .addReg(ShiftReg);
14301 if (is8bit)
14302 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14303 else {
14304 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14305 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14306 .addReg(Mask3Reg)
14307 .addImm(65535);
14308 }
14309 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14310 .addReg(Mask2Reg)
14311 .addReg(ShiftReg);
14312 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14313 .addReg(NewVal2Reg)
14314 .addReg(MaskReg);
14315 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14316 .addReg(OldVal2Reg)
14317 .addReg(MaskReg);
14318
14319 // loop1MBB:
14320 // lwarx tmpDest, ptr
14321 // and tmp, tmpDest, mask
14322 // cmpw tmp, oldval3
14323 // bne- exitBB
14324 BB = loop1MBB;
14325 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14326 .addReg(ZeroReg)
14327 .addReg(PtrReg);
14328 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14329 .addReg(TmpDestReg)
14330 .addReg(MaskReg);
14331 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg).addReg(TmpReg).addReg(OldVal3Reg);
14332 BuildMI(BB, dl, TII->get(PPC::BCC))
14334 .addReg(CrReg)
14335 .addMBB(exitMBB);
14336 BB->addSuccessor(loop2MBB);
14337 BB->addSuccessor(exitMBB);
14338
14339 // loop2MBB:
14340 // andc tmp2, tmpDest, mask
14341 // or tmp4, tmp2, newval3
14342 // stwcx. tmp4, ptr
14343 // bne- loop1MBB
14344 // b exitBB
14345 BB = loop2MBB;
14346 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14347 .addReg(TmpDestReg)
14348 .addReg(MaskReg);
14349 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14350 .addReg(Tmp2Reg)
14351 .addReg(NewVal3Reg);
14352 BuildMI(BB, dl, TII->get(PPC::STWCX))
14353 .addReg(Tmp4Reg)
14354 .addReg(ZeroReg)
14355 .addReg(PtrReg);
14356 BuildMI(BB, dl, TII->get(PPC::BCC))
14358 .addReg(PPC::CR0)
14359 .addMBB(loop1MBB);
14360 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14361 BB->addSuccessor(loop1MBB);
14362 BB->addSuccessor(exitMBB);
14363
14364 // exitMBB:
14365 // srw dest, tmpDest, shift
14366 BB = exitMBB;
14367 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14368 .addReg(TmpReg)
14369 .addReg(ShiftReg);
14370
14371 return BB;
14372}
14373
14376 MachineBasicBlock *BB) const {
14377 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
14378
14379 // To "insert" these instructions we actually have to insert their
14380 // control-flow patterns.
14381 const BasicBlock *LLVM_BB = BB->getBasicBlock();
14383
14384 MachineFunction *F = BB->getParent();
14385 MachineRegisterInfo &MRI = F->getRegInfo();
14386
14387 // Handle SELECT with ISEL support first (before generic SELECT handling)
14388 if (IsSelect(MI.getOpcode()))
14389 return emitSelect(MI, BB, TII, Subtarget);
14390
14391 switch (MI.getOpcode()) {
14392 case TargetOpcode::STACKMAP:
14393 return emitPatchPoint(MI, BB);
14394 case TargetOpcode::PATCHPOINT:
14395 // Call lowering should have added an r2 operand to indicate a dependence
14396 // on the TOC base pointer value. It can't however, because there is no
14397 // way to mark the dependence as implicit there, and so the stackmap code
14398 // will confuse it with a regular operand. Instead, add the dependence
14399 // here.
14400 if (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls())
14401 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
14402 return emitPatchPoint(MI, BB);
14403
14404 case PPC::EH_SjLj_SetJmp32:
14405 case PPC::EH_SjLj_SetJmp64:
14406 return emitEHSjLjSetJmp(MI, BB);
14407
14408 case PPC::EH_SjLj_LongJmp32:
14409 case PPC::EH_SjLj_LongJmp64:
14410 return emitEHSjLjLongJmp(MI, BB);
14411
14412 case PPC::ReadTB: {
14413 // To read the 64-bit time-base register on a 32-bit target, we read the
14414 // two halves. Should the counter have wrapped while it was being read, we
14415 // need to try again.
14416 // ...
14417 // readLoop:
14418 // mfspr Rx,TBU # load from TBU
14419 // mfspr Ry,TB # load from TB
14420 // mfspr Rz,TBU # load from TBU
14421 // cmpw crX,Rx,Rz # check if 'old'='new'
14422 // bne readLoop # branch if they're not equal
14423 // ...
14424
14425 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
14426 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
14427 DebugLoc dl = MI.getDebugLoc();
14428 F->insert(It, readMBB);
14429 F->insert(It, sinkMBB);
14430
14431 // Transfer the remainder of BB and its successor edges to sinkMBB.
14432 sinkMBB->splice(sinkMBB->begin(), BB,
14433 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14435
14436 BB->addSuccessor(readMBB);
14437 BB = readMBB;
14438
14439 MachineRegisterInfo &RegInfo = F->getRegInfo();
14440 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14441 Register LoReg = MI.getOperand(0).getReg();
14442 Register HiReg = MI.getOperand(1).getReg();
14443
14444 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14445 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14446 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14447
14448 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14449
14450 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14451 .addReg(HiReg)
14452 .addReg(ReadAgainReg);
14453 BuildMI(BB, dl, TII->get(PPC::BCC))
14455 .addReg(CmpReg)
14456 .addMBB(readMBB);
14457
14458 BB->addSuccessor(readMBB);
14459 BB->addSuccessor(sinkMBB);
14460 break;
14461 }
14462 case PPC::ATOMIC_LOAD_ADD_NOWP:
14463 BB = EmitPartwordAtomicBinary(MI, BB, PPC::ADD4);
14464 break;
14465 case PPC::ATOMIC_LOAD_ADD:
14466 BB = EmitAtomicBinary(MI, BB, PPC::ADD4);
14467 break;
14468 case PPC::ATOMIC_LOAD_ADD_I64:
14469 BB = EmitAtomicBinary(MI, BB, PPC::ADD8);
14470 break;
14471 case PPC::ATOMIC_LOAD_AND_NOWP:
14472 BB = EmitPartwordAtomicBinary(MI, BB, PPC::AND);
14473 break;
14474 case PPC::ATOMIC_LOAD_AND:
14475 BB = EmitAtomicBinary(MI, BB, PPC::AND);
14476 break;
14477 case PPC::ATOMIC_LOAD_AND_I64:
14478 BB = EmitAtomicBinary(MI, BB, PPC::AND8);
14479 break;
14480 case PPC::ATOMIC_LOAD_OR_NOWP:
14481 BB = EmitPartwordAtomicBinary(MI, BB, PPC::OR);
14482 break;
14483 case PPC::ATOMIC_LOAD_OR:
14484 BB = EmitAtomicBinary(MI, BB, PPC::OR);
14485 break;
14486 case PPC::ATOMIC_LOAD_OR_I64:
14487 BB = EmitAtomicBinary(MI, BB, PPC::OR8);
14488 break;
14489 case PPC::ATOMIC_LOAD_XOR_NOWP:
14490 BB = EmitPartwordAtomicBinary(MI, BB, PPC::XOR);
14491 break;
14492 case PPC::ATOMIC_LOAD_XOR:
14493 BB = EmitAtomicBinary(MI, BB, PPC::XOR);
14494 break;
14495 case PPC::ATOMIC_LOAD_XOR_I64:
14496 BB = EmitAtomicBinary(MI, BB, PPC::XOR8);
14497 break;
14498 case PPC::ATOMIC_LOAD_NAND_NOWP:
14499 BB = EmitPartwordAtomicBinary(MI, BB, PPC::NAND);
14500 break;
14501 case PPC::ATOMIC_LOAD_NAND:
14502 BB = EmitAtomicBinary(MI, BB, PPC::NAND);
14503 break;
14504 case PPC::ATOMIC_LOAD_NAND_I64:
14505 BB = EmitAtomicBinary(MI, BB, PPC::NAND8);
14506 break;
14507 case PPC::ATOMIC_LOAD_SUB_NOWP:
14508 BB = EmitPartwordAtomicBinary(MI, BB, PPC::SUBF);
14509 break;
14510 case PPC::ATOMIC_LOAD_SUB:
14511 BB = EmitAtomicBinary(MI, BB, PPC::SUBF);
14512 break;
14513 case PPC::ATOMIC_LOAD_SUB_I64:
14514 BB = EmitAtomicBinary(MI, BB, PPC::SUBF8);
14515 break;
14516 case PPC::ATOMIC_LOAD_MIN_NOWP:
14517 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14518 break;
14519 case PPC::ATOMIC_LOAD_MIN:
14520 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_LT);
14521 break;
14522 case PPC::ATOMIC_LOAD_MIN_I64:
14523 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_LT);
14524 break;
14525 case PPC::ATOMIC_LOAD_MAX_NOWP:
14526 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14527 break;
14528 case PPC::ATOMIC_LOAD_MAX:
14529 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPW, PPC::PRED_GT);
14530 break;
14531 case PPC::ATOMIC_LOAD_MAX_I64:
14532 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPD, PPC::PRED_GT);
14533 break;
14534 case PPC::ATOMIC_LOAD_UMIN_NOWP:
14535 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14536 break;
14537 case PPC::ATOMIC_LOAD_UMIN:
14538 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_LT);
14539 break;
14540 case PPC::ATOMIC_LOAD_UMIN_I64:
14541 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_LT);
14542 break;
14543 case PPC::ATOMIC_LOAD_UMAX_NOWP:
14544 BB = EmitPartwordAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14545 break;
14546 case PPC::ATOMIC_LOAD_UMAX:
14547 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLW, PPC::PRED_GT);
14548 break;
14549 case PPC::ATOMIC_LOAD_UMAX_I64:
14550 BB = EmitAtomicBinary(MI, BB, 0, PPC::CMPLD, PPC::PRED_GT);
14551 break;
14552 case PPC::ATOMIC_SWAP_NOWP:
14553 BB = EmitPartwordAtomicBinary(MI, BB, 0);
14554 break;
14555 case PPC::ATOMIC_SWAP:
14556 case PPC::ATOMIC_SWAP_I64:
14557 BB = EmitAtomicBinary(MI, BB, 0);
14558 break;
14559 case PPC::ATOMIC_CMP_SWAP_I32:
14560 case PPC::ATOMIC_CMP_SWAP_I64:
14561 case PPC::ATOMIC_CMP_SWAP_I8:
14562 case PPC::ATOMIC_CMP_SWAP_I16: {
14563 // Use hardware-supported atomic operations if available
14564 bool useHardware = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14565 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14566 (Subtarget.hasPartwordAtomics() &&
14567 (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14568 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16));
14569
14570 if (useHardware)
14571 BB = emitAtomicCmpSwapHardware(MI, BB, TII, Subtarget);
14572 else
14573 BB = emitAtomicCmpSwapSoftware(MI, BB, TII, Subtarget);
14574 break;
14575 }
14576 case PPC::FADDrtz: {
14577 // This pseudo performs an FADD with rounding mode temporarily forced
14578 // to round-to-zero. We emit this via custom inserter since the FPSCR
14579 // is not modeled at the SelectionDAG level.
14580 Register Dest = MI.getOperand(0).getReg();
14581 Register Src1 = MI.getOperand(1).getReg();
14582 Register Src2 = MI.getOperand(2).getReg();
14583 DebugLoc dl = MI.getDebugLoc();
14584
14585 MachineRegisterInfo &RegInfo = F->getRegInfo();
14586 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14587
14588 // Save FPSCR value.
14589 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14590
14591 // Set rounding mode to round-to-zero.
14592 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14593 .addImm(31)
14595
14596 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14597 .addImm(30)
14599
14600 // Perform addition.
14601 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14602 .addReg(Src1)
14603 .addReg(Src2);
14604 if (MI.getFlag(MachineInstr::NoFPExcept))
14606
14607 // Restore FPSCR value.
14608 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14609 break;
14610 }
14611 case PPC::ANDI_rec_1_EQ_BIT:
14612 case PPC::ANDI_rec_1_GT_BIT:
14613 case PPC::ANDI_rec_1_EQ_BIT8:
14614 case PPC::ANDI_rec_1_GT_BIT8: {
14615 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14616 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14617 ? PPC::ANDI8_rec
14618 : PPC::ANDI_rec;
14619 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14620 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14621
14622 MachineRegisterInfo &RegInfo = F->getRegInfo();
14623 Register Dest = RegInfo.createVirtualRegister(
14624 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14625
14626 DebugLoc Dl = MI.getDebugLoc();
14627 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14628 .addReg(MI.getOperand(1).getReg())
14629 .addImm(1);
14630 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14631 MI.getOperand(0).getReg())
14632 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14633 break;
14634 }
14635 case PPC::TCHECK_RET: {
14636 DebugLoc Dl = MI.getDebugLoc();
14637 MachineRegisterInfo &RegInfo = F->getRegInfo();
14638 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14639 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14640 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14641 MI.getOperand(0).getReg())
14642 .addReg(CRReg);
14643 break;
14644 }
14645 case PPC::TBEGIN_RET: {
14646 DebugLoc Dl = MI.getDebugLoc();
14647 unsigned Imm = MI.getOperand(1).getImm();
14648 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14649 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14650 MI.getOperand(0).getReg())
14651 .addReg(PPC::CR0EQ);
14652 break;
14653 }
14654 case PPC::SETRNDi: {
14655 DebugLoc dl = MI.getDebugLoc();
14656 Register OldFPSCRReg = MI.getOperand(0).getReg();
14657
14658 // Save FPSCR value.
14659 if (MRI.use_empty(OldFPSCRReg))
14660 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14661 else
14662 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14663
14664 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14665 // the following settings:
14666 // 00 Round to nearest
14667 // 01 Round to 0
14668 // 10 Round to +inf
14669 // 11 Round to -inf
14670
14671 // When the operand is immediate, using the two least significant bits of
14672 // the immediate to set the bits 62:63 of FPSCR.
14673 unsigned Mode = MI.getOperand(1).getImm();
14674 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14675 .addImm(31)
14677
14678 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14679 .addImm(30)
14681 break;
14682 }
14683 case PPC::SETRND: {
14684 DebugLoc dl = MI.getDebugLoc();
14685
14686 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14687 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14688 // If the target doesn't have DirectMove, we should use stack to do the
14689 // conversion, because the target doesn't have the instructions like mtvsrd
14690 // or mfvsrd to do this conversion directly.
14691 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14692 if (Subtarget.hasDirectMove()) {
14693 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14694 .addReg(SrcReg);
14695 } else {
14696 // Use stack to do the register copy.
14697 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14698 MachineRegisterInfo &RegInfo = F->getRegInfo();
14699 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14700 if (RC == &PPC::F8RCRegClass) {
14701 // Copy register from F8RCRegClass to G8RCRegclass.
14702 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14703 "Unsupported RegClass.");
14704
14705 StoreOp = PPC::STFD;
14706 LoadOp = PPC::LD;
14707 } else {
14708 // Copy register from G8RCRegClass to F8RCRegclass.
14709 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14710 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14711 "Unsupported RegClass.");
14712 }
14713
14714 MachineFrameInfo &MFI = F->getFrameInfo();
14715 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14716
14717 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14718 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14720 MFI.getObjectAlign(FrameIdx));
14721
14722 // Store the SrcReg into the stack.
14723 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14724 .addReg(SrcReg)
14725 .addImm(0)
14726 .addFrameIndex(FrameIdx)
14727 .addMemOperand(MMOStore);
14728
14729 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14730 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14732 MFI.getObjectAlign(FrameIdx));
14733
14734 // Load from the stack where SrcReg is stored, and save to DestReg,
14735 // so we have done the RegClass conversion from RegClass::SrcReg to
14736 // RegClass::DestReg.
14737 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14738 .addImm(0)
14739 .addFrameIndex(FrameIdx)
14740 .addMemOperand(MMOLoad);
14741 }
14742 };
14743
14744 Register OldFPSCRReg = MI.getOperand(0).getReg();
14745
14746 // Save FPSCR value.
14747 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14748
14749 // When the operand is gprc register, use two least significant bits of the
14750 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14751 //
14752 // copy OldFPSCRTmpReg, OldFPSCRReg
14753 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14754 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14755 // copy NewFPSCRReg, NewFPSCRTmpReg
14756 // mtfsf 255, NewFPSCRReg
14757 MachineOperand SrcOp = MI.getOperand(1);
14758 MachineRegisterInfo &RegInfo = F->getRegInfo();
14759 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14760
14761 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14762
14763 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14764 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14765
14766 // The first operand of INSERT_SUBREG should be a register which has
14767 // subregisters, we only care about its RegClass, so we should use an
14768 // IMPLICIT_DEF register.
14769 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14770 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14771 .addReg(ImDefReg)
14772 .add(SrcOp)
14773 .addImm(1);
14774
14775 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14776 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14777 .addReg(OldFPSCRTmpReg)
14778 .addReg(ExtSrcReg)
14779 .addImm(0)
14780 .addImm(62);
14781
14782 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14783 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14784
14785 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14786 // bits of FPSCR.
14787 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14788 .addImm(255)
14789 .addReg(NewFPSCRReg)
14790 .addImm(0)
14791 .addImm(0);
14792 break;
14793 }
14794 case PPC::SETFLM: {
14795 DebugLoc Dl = MI.getDebugLoc();
14796
14797 // Result of setflm is previous FPSCR content, so we need to save it first.
14798 Register OldFPSCRReg = MI.getOperand(0).getReg();
14799 if (MRI.use_empty(OldFPSCRReg))
14800 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14801 else
14802 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14803
14804 // Put bits in 32:63 to FPSCR.
14805 Register NewFPSCRReg = MI.getOperand(1).getReg();
14806 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14807 .addImm(255)
14808 .addReg(NewFPSCRReg)
14809 .addImm(0)
14810 .addImm(0);
14811 break;
14812 }
14813 case PPC::PROBED_ALLOCA_32:
14814 case PPC::PROBED_ALLOCA_64:
14815 return emitProbedAlloca(MI, BB);
14816
14817 case PPC::SPLIT_QUADWORD: {
14818 DebugLoc DL = MI.getDebugLoc();
14819 Register Src = MI.getOperand(2).getReg();
14820 Register Lo = MI.getOperand(0).getReg();
14821 Register Hi = MI.getOperand(1).getReg();
14822 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14823 .addDef(Lo)
14824 .addUse(Src, {}, PPC::sub_gp8_x1);
14825 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14826 .addDef(Hi)
14827 .addUse(Src, {}, PPC::sub_gp8_x0);
14828 break;
14829 }
14830 case PPC::LQX_PSEUDO:
14831 case PPC::STQX_PSEUDO: {
14832 DebugLoc DL = MI.getDebugLoc();
14833 // Ptr is used as the ptr_rc_no_r0 part
14834 // of LQ/STQ's memory operand and adding result of RA and RB,
14835 // so it has to be g8rc_and_g8rc_nox0.
14836 Register Ptr =
14837 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14838 Register Val = MI.getOperand(0).getReg();
14839 Register RA = MI.getOperand(1).getReg();
14840 Register RB = MI.getOperand(2).getReg();
14841 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14842 BuildMI(*BB, MI, DL,
14843 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14844 : TII->get(PPC::STQ))
14845 .addReg(Val, getDefRegState(MI.getOpcode() == PPC::LQX_PSEUDO))
14846 .addImm(0)
14847 .addReg(Ptr);
14848 break;
14849 }
14850 default:
14851 llvm_unreachable("Unexpected instr type to insert");
14852 }
14853
14854 MI.eraseFromParent(); // The pseudo instruction is gone now.
14855 return BB;
14856}
14857
14858//===----------------------------------------------------------------------===//
14859// Target Optimization Hooks
14860//===----------------------------------------------------------------------===//
14861
14862static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14863 // For the estimates, convergence is quadratic, so we essentially double the
14864 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14865 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14866 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14867 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14868 if (VT.getScalarType() == MVT::f64)
14869 RefinementSteps++;
14870 return RefinementSteps;
14871}
14872
14873SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14874 const DenormalMode &Mode,
14875 SDNodeFlags Flags) const {
14876 // We only have VSX Vector Test for software Square Root.
14877 EVT VT = Op.getValueType();
14878 if (!isTypeLegal(MVT::i1) ||
14879 (VT != MVT::f64 &&
14880 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14881 return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);
14882
14883 SDLoc DL(Op);
14884 // The output register of FTSQRT is CR field.
14885 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
14886 // ftsqrt BF,FRB
14887 // Let e_b be the unbiased exponent of the double-precision
14888 // floating-point operand in register FRB.
14889 // fe_flag is set to 1 if either of the following conditions occurs.
14890 // - The double-precision floating-point operand in register FRB is a zero,
14891 // a NaN, or an infinity, or a negative value.
14892 // - e_b is less than or equal to -970.
14893 // Otherwise fe_flag is set to 0.
14894 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14895 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14896 // exponent is less than -970)
14897 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14898 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14899 FTSQRT, SRIdxVal),
14900 0);
14901}
14902
14903SDValue
14904PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14905 SelectionDAG &DAG) const {
14906 // We only have VSX Vector Square Root.
14907 EVT VT = Op.getValueType();
14908 if (VT != MVT::f64 &&
14909 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14911
14912 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14913}
14914
14915SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14916 int Enabled, int &RefinementSteps,
14917 bool &UseOneConstNR,
14918 bool Reciprocal) const {
14919 EVT VT = Operand.getValueType();
14920 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14921 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14922 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14923 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14924 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14925 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14926
14927 // The Newton-Raphson computation with a single constant does not provide
14928 // enough accuracy on some CPUs.
14929 UseOneConstNR = !Subtarget.needsTwoConstNR();
14930 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14931 }
14932 return SDValue();
14933}
14934
14935SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14936 int Enabled,
14937 int &RefinementSteps) const {
14938 EVT VT = Operand.getValueType();
14939 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14940 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14941 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14942 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14943 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14944 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14945 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14946 }
14947 return SDValue();
14948}
14949
14951 // Note: This functionality is used only when arcp is enabled, and
14952 // on cores with reciprocal estimates (which are used when arcp is
14953 // enabled for division), this functionality is redundant with the default
14954 // combiner logic (once the division -> reciprocal/multiply transformation
14955 // has taken place). As a result, this matters more for older cores than for
14956 // newer ones.
14957
14958 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14959 // reciprocal if there are two or more FDIVs (for embedded cores with only
14960 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14961 switch (Subtarget.getCPUDirective()) {
14962 default:
14963 return 3;
14964 case PPC::DIR_440:
14965 case PPC::DIR_A2:
14966 case PPC::DIR_E500:
14967 case PPC::DIR_E500mc:
14968 case PPC::DIR_E5500:
14969 return 2;
14970 }
14971}
14972
14973// isConsecutiveLSLoc needs to work even if all adds have not yet been
14974// collapsed, and so we need to look through chains of them.
14976 int64_t& Offset, SelectionDAG &DAG) {
14977 if (DAG.isBaseWithConstantOffset(Loc)) {
14978 Base = Loc.getOperand(0);
14979 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14980
14981 // The base might itself be a base plus an offset, and if so, accumulate
14982 // that as well.
14983 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14984 }
14985}
14986
14988 unsigned Bytes, int Dist,
14989 SelectionDAG &DAG) {
14990 if (VT.getSizeInBits() / 8 != Bytes)
14991 return false;
14992
14993 SDValue BaseLoc = Base->getBasePtr();
14994 if (Loc.getOpcode() == ISD::FrameIndex) {
14995 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14996 return false;
14998 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14999 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
15000 int FS = MFI.getObjectSize(FI);
15001 int BFS = MFI.getObjectSize(BFI);
15002 if (FS != BFS || FS != (int)Bytes) return false;
15003 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
15004 }
15005
15006 SDValue Base1 = Loc, Base2 = BaseLoc;
15007 int64_t Offset1 = 0, Offset2 = 0;
15008 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
15009 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
15010 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
15011 return true;
15012
15013 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15014 const GlobalValue *GV1 = nullptr;
15015 const GlobalValue *GV2 = nullptr;
15016 Offset1 = 0;
15017 Offset2 = 0;
15018 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
15019 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
15020 if (isGA1 && isGA2 && GV1 == GV2)
15021 return Offset1 == (Offset2 + Dist*Bytes);
15022 return false;
15023}
15024
15025// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
15026// not enforce equality of the chain operands.
15028 unsigned Bytes, int Dist,
15029 SelectionDAG &DAG) {
15031 EVT VT = LS->getMemoryVT();
15032 SDValue Loc = LS->getBasePtr();
15033 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
15034 }
15035
15036 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
15037 EVT VT;
15038 switch (N->getConstantOperandVal(1)) {
15039 default: return false;
15040 case Intrinsic::ppc_altivec_lvx:
15041 case Intrinsic::ppc_altivec_lvxl:
15042 case Intrinsic::ppc_vsx_lxvw4x:
15043 case Intrinsic::ppc_vsx_lxvw4x_be:
15044 VT = MVT::v4i32;
15045 break;
15046 case Intrinsic::ppc_vsx_lxvd2x:
15047 case Intrinsic::ppc_vsx_lxvd2x_be:
15048 VT = MVT::v2f64;
15049 break;
15050 case Intrinsic::ppc_altivec_lvebx:
15051 VT = MVT::i8;
15052 break;
15053 case Intrinsic::ppc_altivec_lvehx:
15054 VT = MVT::i16;
15055 break;
15056 case Intrinsic::ppc_altivec_lvewx:
15057 VT = MVT::i32;
15058 break;
15059 }
15060
15061 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
15062 }
15063
15064 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
15065 EVT VT;
15066 switch (N->getConstantOperandVal(1)) {
15067 default: return false;
15068 case Intrinsic::ppc_altivec_stvx:
15069 case Intrinsic::ppc_altivec_stvxl:
15070 case Intrinsic::ppc_vsx_stxvw4x:
15071 VT = MVT::v4i32;
15072 break;
15073 case Intrinsic::ppc_vsx_stxvd2x:
15074 VT = MVT::v2f64;
15075 break;
15076 case Intrinsic::ppc_vsx_stxvw4x_be:
15077 VT = MVT::v4i32;
15078 break;
15079 case Intrinsic::ppc_vsx_stxvd2x_be:
15080 VT = MVT::v2f64;
15081 break;
15082 case Intrinsic::ppc_altivec_stvebx:
15083 VT = MVT::i8;
15084 break;
15085 case Intrinsic::ppc_altivec_stvehx:
15086 VT = MVT::i16;
15087 break;
15088 case Intrinsic::ppc_altivec_stvewx:
15089 VT = MVT::i32;
15090 break;
15091 }
15092
15093 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
15094 }
15095
15096 return false;
15097}
15098
15099// Return true is there is a nearyby consecutive load to the one provided
15100// (regardless of alignment). We search up and down the chain, looking though
15101// token factors and other loads (but nothing else). As a result, a true result
15102// indicates that it is safe to create a new consecutive load adjacent to the
15103// load provided.
15105 SDValue Chain = LD->getChain();
15106 EVT VT = LD->getMemoryVT();
15107
15108 SmallPtrSet<SDNode *, 16> LoadRoots;
15109 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
15111
15112 // First, search up the chain, branching to follow all token-factor operands.
15113 // If we find a consecutive load, then we're done, otherwise, record all
15114 // nodes just above the top-level loads and token factors.
15115 while (!Queue.empty()) {
15116 SDNode *ChainNext = Queue.pop_back_val();
15117 if (!Visited.insert(ChainNext).second)
15118 continue;
15119
15120 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
15121 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15122 return true;
15123
15124 if (!Visited.count(ChainLD->getChain().getNode()))
15125 Queue.push_back(ChainLD->getChain().getNode());
15126 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
15127 for (const SDUse &O : ChainNext->ops())
15128 if (!Visited.count(O.getNode()))
15129 Queue.push_back(O.getNode());
15130 } else
15131 LoadRoots.insert(ChainNext);
15132 }
15133
15134 // Second, search down the chain, starting from the top-level nodes recorded
15135 // in the first phase. These top-level nodes are the nodes just above all
15136 // loads and token factors. Starting with their uses, recursively look though
15137 // all loads (just the chain uses) and token factors to find a consecutive
15138 // load.
15139 Visited.clear();
15140 Queue.clear();
15141
15142 for (SDNode *I : LoadRoots) {
15143 Queue.push_back(I);
15144
15145 while (!Queue.empty()) {
15146 SDNode *LoadRoot = Queue.pop_back_val();
15147 if (!Visited.insert(LoadRoot).second)
15148 continue;
15149
15150 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
15151 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
15152 return true;
15153
15154 for (SDNode *U : LoadRoot->users())
15155 if (((isa<MemSDNode>(U) &&
15156 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
15157 U->getOpcode() == ISD::TokenFactor) &&
15158 !Visited.count(U))
15159 Queue.push_back(U);
15160 }
15161 }
15162
15163 return false;
15164}
15165
15166/// This function is called when we have proved that a SETCC node can be replaced
15167/// by subtraction (and other supporting instructions) so that the result of
15168/// comparison is kept in a GPR instead of CR. This function is purely for
15169/// codegen purposes and has some flags to guide the codegen process.
15170static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
15171 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
15172 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15173
15174 // Zero extend the operands to the largest legal integer. Originally, they
15175 // must be of a strictly smaller size.
15176 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
15177 DAG.getConstant(Size, DL, MVT::i32));
15178 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
15179 DAG.getConstant(Size, DL, MVT::i32));
15180
15181 // Swap if needed. Depends on the condition code.
15182 if (Swap)
15183 std::swap(Op0, Op1);
15184
15185 // Subtract extended integers.
15186 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
15187
15188 // Move the sign bit to the least significant position and zero out the rest.
15189 // Now the least significant bit carries the result of original comparison.
15190 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
15191 DAG.getConstant(Size - 1, DL, MVT::i32));
15192 auto Final = Shifted;
15193
15194 // Complement the result if needed. Based on the condition code.
15195 if (Complement)
15196 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
15197 DAG.getConstant(1, DL, MVT::i64));
15198
15199 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
15200}
15201
15202SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
15203 DAGCombinerInfo &DCI) const {
15204 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15205
15206 SelectionDAG &DAG = DCI.DAG;
15207 SDLoc DL(N);
15208
15209 // Size of integers being compared has a critical role in the following
15210 // analysis, so we prefer to do this when all types are legal.
15211 if (!DCI.isAfterLegalizeDAG())
15212 return SDValue();
15213
15214 // If all users of SETCC extend its value to a legal integer type
15215 // then we replace SETCC with a subtraction
15216 for (const SDNode *U : N->users())
15217 if (U->getOpcode() != ISD::ZERO_EXTEND)
15218 return SDValue();
15219
15220 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15221 auto OpSize = N->getOperand(0).getValueSizeInBits();
15222
15224
15225 if (OpSize < Size) {
15226 switch (CC) {
15227 default: break;
15228 case ISD::SETULT:
15229 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15230 case ISD::SETULE:
15231 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15232 case ISD::SETUGT:
15233 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15234 case ISD::SETUGE:
15235 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15236 }
15237 }
15238
15239 return SDValue();
15240}
15241
15242SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15243 DAGCombinerInfo &DCI) const {
15244 SelectionDAG &DAG = DCI.DAG;
15245 SDLoc dl(N);
15246
15247 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15248 // If we're tracking CR bits, we need to be careful that we don't have:
15249 // trunc(binary-ops(zext(x), zext(y)))
15250 // or
15251 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15252 // such that we're unnecessarily moving things into GPRs when it would be
15253 // better to keep them in CR bits.
15254
15255 // Note that trunc here can be an actual i1 trunc, or can be the effective
15256 // truncation that comes from a setcc or select_cc.
15257 if (N->getOpcode() == ISD::TRUNCATE &&
15258 N->getValueType(0) != MVT::i1)
15259 return SDValue();
15260
15261 if (N->getOperand(0).getValueType() != MVT::i32 &&
15262 N->getOperand(0).getValueType() != MVT::i64)
15263 return SDValue();
15264
15265 if (N->getOpcode() == ISD::SETCC ||
15266 N->getOpcode() == ISD::SELECT_CC) {
15267 // If we're looking at a comparison, then we need to make sure that the
15268 // high bits (all except for the first) don't matter the result.
15269 ISD::CondCode CC =
15270 cast<CondCodeSDNode>(N->getOperand(
15271 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15272 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15273
15274 if (ISD::isSignedIntSetCC(CC)) {
15275 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15276 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15277 return SDValue();
15278 } else if (ISD::isUnsignedIntSetCC(CC)) {
15279 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15280 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15281 !DAG.MaskedValueIsZero(N->getOperand(1),
15282 APInt::getHighBitsSet(OpBits, OpBits-1)))
15283 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15284 : SDValue());
15285 } else {
15286 // This is neither a signed nor an unsigned comparison, just make sure
15287 // that the high bits are equal.
15288 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15289 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15290
15291 // We don't really care about what is known about the first bit (if
15292 // anything), so pretend that it is known zero for both to ensure they can
15293 // be compared as constants.
15294 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15295 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15296
15297 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15298 Op1Known.getConstant() != Op2Known.getConstant())
15299 return SDValue();
15300 }
15301 }
15302
15303 // We now know that the higher-order bits are irrelevant, we just need to
15304 // make sure that all of the intermediate operations are bit operations, and
15305 // all inputs are extensions.
15306 if (N->getOperand(0).getOpcode() != ISD::AND &&
15307 N->getOperand(0).getOpcode() != ISD::OR &&
15308 N->getOperand(0).getOpcode() != ISD::XOR &&
15309 N->getOperand(0).getOpcode() != ISD::SELECT &&
15310 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15311 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15312 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15313 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15314 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15315 return SDValue();
15316
15317 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15318 N->getOperand(1).getOpcode() != ISD::AND &&
15319 N->getOperand(1).getOpcode() != ISD::OR &&
15320 N->getOperand(1).getOpcode() != ISD::XOR &&
15321 N->getOperand(1).getOpcode() != ISD::SELECT &&
15322 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15323 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15324 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15325 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15326 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15327 return SDValue();
15328
15330 SmallVector<SDValue, 8> BinOps, PromOps;
15331 SmallPtrSet<SDNode *, 16> Visited;
15332
15333 for (unsigned i = 0; i < 2; ++i) {
15334 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15335 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15336 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15337 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15338 isa<ConstantSDNode>(N->getOperand(i)))
15339 Inputs.push_back(N->getOperand(i));
15340 else
15341 BinOps.push_back(N->getOperand(i));
15342
15343 if (N->getOpcode() == ISD::TRUNCATE)
15344 break;
15345 }
15346
15347 // Visit all inputs, collect all binary operations (and, or, xor and
15348 // select) that are all fed by extensions.
15349 while (!BinOps.empty()) {
15350 SDValue BinOp = BinOps.pop_back_val();
15351
15352 if (!Visited.insert(BinOp.getNode()).second)
15353 continue;
15354
15355 PromOps.push_back(BinOp);
15356
15357 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15358 // The condition of the select is not promoted.
15359 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15360 continue;
15361 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15362 continue;
15363
15364 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15365 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15366 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15367 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15368 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15369 Inputs.push_back(BinOp.getOperand(i));
15370 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15371 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15372 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15373 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15374 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15375 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15376 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15377 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15378 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15379 BinOps.push_back(BinOp.getOperand(i));
15380 } else {
15381 // We have an input that is not an extension or another binary
15382 // operation; we'll abort this transformation.
15383 return SDValue();
15384 }
15385 }
15386 }
15387
15388 // Make sure that this is a self-contained cluster of operations (which
15389 // is not quite the same thing as saying that everything has only one
15390 // use).
15391 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15392 if (isa<ConstantSDNode>(Inputs[i]))
15393 continue;
15394
15395 for (const SDNode *User : Inputs[i].getNode()->users()) {
15396 if (User != N && !Visited.count(User))
15397 return SDValue();
15398
15399 // Make sure that we're not going to promote the non-output-value
15400 // operand(s) or SELECT or SELECT_CC.
15401 // FIXME: Although we could sometimes handle this, and it does occur in
15402 // practice that one of the condition inputs to the select is also one of
15403 // the outputs, we currently can't deal with this.
15404 if (User->getOpcode() == ISD::SELECT) {
15405 if (User->getOperand(0) == Inputs[i])
15406 return SDValue();
15407 } else if (User->getOpcode() == ISD::SELECT_CC) {
15408 if (User->getOperand(0) == Inputs[i] ||
15409 User->getOperand(1) == Inputs[i])
15410 return SDValue();
15411 }
15412 }
15413 }
15414
15415 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15416 for (const SDNode *User : PromOps[i].getNode()->users()) {
15417 if (User != N && !Visited.count(User))
15418 return SDValue();
15419
15420 // Make sure that we're not going to promote the non-output-value
15421 // operand(s) or SELECT or SELECT_CC.
15422 // FIXME: Although we could sometimes handle this, and it does occur in
15423 // practice that one of the condition inputs to the select is also one of
15424 // the outputs, we currently can't deal with this.
15425 if (User->getOpcode() == ISD::SELECT) {
15426 if (User->getOperand(0) == PromOps[i])
15427 return SDValue();
15428 } else if (User->getOpcode() == ISD::SELECT_CC) {
15429 if (User->getOperand(0) == PromOps[i] ||
15430 User->getOperand(1) == PromOps[i])
15431 return SDValue();
15432 }
15433 }
15434 }
15435
15436 // Replace all inputs with the extension operand.
15437 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15438 // Constants may have users outside the cluster of to-be-promoted nodes,
15439 // and so we need to replace those as we do the promotions.
15440 if (isa<ConstantSDNode>(Inputs[i]))
15441 continue;
15442 else
15443 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15444 }
15445
15446 std::list<HandleSDNode> PromOpHandles;
15447 for (auto &PromOp : PromOps)
15448 PromOpHandles.emplace_back(PromOp);
15449
15450 // Replace all operations (these are all the same, but have a different
15451 // (i1) return type). DAG.getNode will validate that the types of
15452 // a binary operator match, so go through the list in reverse so that
15453 // we've likely promoted both operands first. Any intermediate truncations or
15454 // extensions disappear.
15455 while (!PromOpHandles.empty()) {
15456 SDValue PromOp = PromOpHandles.back().getValue();
15457 PromOpHandles.pop_back();
15458
15459 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15460 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15461 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15462 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15463 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15464 PromOp.getOperand(0).getValueType() != MVT::i1) {
15465 // The operand is not yet ready (see comment below).
15466 PromOpHandles.emplace_front(PromOp);
15467 continue;
15468 }
15469
15470 SDValue RepValue = PromOp.getOperand(0);
15471 if (isa<ConstantSDNode>(RepValue))
15472 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15473
15474 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15475 continue;
15476 }
15477
15478 unsigned C;
15479 switch (PromOp.getOpcode()) {
15480 default: C = 0; break;
15481 case ISD::SELECT: C = 1; break;
15482 case ISD::SELECT_CC: C = 2; break;
15483 }
15484
15485 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15486 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15487 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15488 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15489 // The to-be-promoted operands of this node have not yet been
15490 // promoted (this should be rare because we're going through the
15491 // list backward, but if one of the operands has several users in
15492 // this cluster of to-be-promoted nodes, it is possible).
15493 PromOpHandles.emplace_front(PromOp);
15494 continue;
15495 }
15496
15498
15499 // If there are any constant inputs, make sure they're replaced now.
15500 for (unsigned i = 0; i < 2; ++i)
15501 if (isa<ConstantSDNode>(Ops[C+i]))
15502 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15503
15504 DAG.ReplaceAllUsesOfValueWith(PromOp,
15505 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15506 }
15507
15508 // Now we're left with the initial truncation itself.
15509 if (N->getOpcode() == ISD::TRUNCATE)
15510 return N->getOperand(0);
15511
15512 // Otherwise, this is a comparison. The operands to be compared have just
15513 // changed type (to i1), but everything else is the same.
15514 return SDValue(N, 0);
15515}
15516
15517SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15518 DAGCombinerInfo &DCI) const {
15519 SelectionDAG &DAG = DCI.DAG;
15520 SDLoc dl(N);
15521
15522 // If we're tracking CR bits, we need to be careful that we don't have:
15523 // zext(binary-ops(trunc(x), trunc(y)))
15524 // or
15525 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15526 // such that we're unnecessarily moving things into CR bits that can more
15527 // efficiently stay in GPRs. Note that if we're not certain that the high
15528 // bits are set as required by the final extension, we still may need to do
15529 // some masking to get the proper behavior.
15530
15531 // This same functionality is important on PPC64 when dealing with
15532 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15533 // the return values of functions. Because it is so similar, it is handled
15534 // here as well.
15535
15536 if (N->getValueType(0) != MVT::i32 &&
15537 N->getValueType(0) != MVT::i64)
15538 return SDValue();
15539
15540 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15541 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15542 return SDValue();
15543
15544 if (N->getOperand(0).getOpcode() != ISD::AND &&
15545 N->getOperand(0).getOpcode() != ISD::OR &&
15546 N->getOperand(0).getOpcode() != ISD::XOR &&
15547 N->getOperand(0).getOpcode() != ISD::SELECT &&
15548 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15549 return SDValue();
15550
15552 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15553 SmallPtrSet<SDNode *, 16> Visited;
15554
15555 // Visit all inputs, collect all binary operations (and, or, xor and
15556 // select) that are all fed by truncations.
15557 while (!BinOps.empty()) {
15558 SDValue BinOp = BinOps.pop_back_val();
15559
15560 if (!Visited.insert(BinOp.getNode()).second)
15561 continue;
15562
15563 PromOps.push_back(BinOp);
15564
15565 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15566 // The condition of the select is not promoted.
15567 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15568 continue;
15569 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15570 continue;
15571
15572 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15573 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15574 Inputs.push_back(BinOp.getOperand(i));
15575 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15576 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15577 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15578 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15579 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15580 BinOps.push_back(BinOp.getOperand(i));
15581 } else {
15582 // We have an input that is not a truncation or another binary
15583 // operation; we'll abort this transformation.
15584 return SDValue();
15585 }
15586 }
15587 }
15588
15589 // The operands of a select that must be truncated when the select is
15590 // promoted because the operand is actually part of the to-be-promoted set.
15591 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15592
15593 // Make sure that this is a self-contained cluster of operations (which
15594 // is not quite the same thing as saying that everything has only one
15595 // use).
15596 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15597 if (isa<ConstantSDNode>(Inputs[i]))
15598 continue;
15599
15600 for (SDNode *User : Inputs[i].getNode()->users()) {
15601 if (User != N && !Visited.count(User))
15602 return SDValue();
15603
15604 // If we're going to promote the non-output-value operand(s) or SELECT or
15605 // SELECT_CC, record them for truncation.
15606 if (User->getOpcode() == ISD::SELECT) {
15607 if (User->getOperand(0) == Inputs[i])
15608 SelectTruncOp[0].insert(std::make_pair(User,
15609 User->getOperand(0).getValueType()));
15610 } else if (User->getOpcode() == ISD::SELECT_CC) {
15611 if (User->getOperand(0) == Inputs[i])
15612 SelectTruncOp[0].insert(std::make_pair(User,
15613 User->getOperand(0).getValueType()));
15614 if (User->getOperand(1) == Inputs[i])
15615 SelectTruncOp[1].insert(std::make_pair(User,
15616 User->getOperand(1).getValueType()));
15617 }
15618 }
15619 }
15620
15621 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15622 for (SDNode *User : PromOps[i].getNode()->users()) {
15623 if (User != N && !Visited.count(User))
15624 return SDValue();
15625
15626 // If we're going to promote the non-output-value operand(s) or SELECT or
15627 // SELECT_CC, record them for truncation.
15628 if (User->getOpcode() == ISD::SELECT) {
15629 if (User->getOperand(0) == PromOps[i])
15630 SelectTruncOp[0].insert(std::make_pair(User,
15631 User->getOperand(0).getValueType()));
15632 } else if (User->getOpcode() == ISD::SELECT_CC) {
15633 if (User->getOperand(0) == PromOps[i])
15634 SelectTruncOp[0].insert(std::make_pair(User,
15635 User->getOperand(0).getValueType()));
15636 if (User->getOperand(1) == PromOps[i])
15637 SelectTruncOp[1].insert(std::make_pair(User,
15638 User->getOperand(1).getValueType()));
15639 }
15640 }
15641 }
15642
15643 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15644 bool ReallyNeedsExt = false;
15645 if (N->getOpcode() != ISD::ANY_EXTEND) {
15646 // If all of the inputs are not already sign/zero extended, then
15647 // we'll still need to do that at the end.
15648 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15649 if (isa<ConstantSDNode>(Inputs[i]))
15650 continue;
15651
15652 unsigned OpBits =
15653 Inputs[i].getOperand(0).getValueSizeInBits();
15654 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15655
15656 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15657 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15658 APInt::getHighBitsSet(OpBits,
15659 OpBits-PromBits))) ||
15660 (N->getOpcode() == ISD::SIGN_EXTEND &&
15661 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15662 (OpBits-(PromBits-1)))) {
15663 ReallyNeedsExt = true;
15664 break;
15665 }
15666 }
15667 }
15668
15669 // Convert PromOps to handles before doing any RAUW operations, as these
15670 // may CSE with existing nodes, deleting the originals.
15671 std::list<HandleSDNode> PromOpHandles;
15672 for (auto &PromOp : PromOps)
15673 PromOpHandles.emplace_back(PromOp);
15674
15675 // Replace all inputs, either with the truncation operand, or a
15676 // truncation or extension to the final output type.
15677 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15678 // Constant inputs need to be replaced with the to-be-promoted nodes that
15679 // use them because they might have users outside of the cluster of
15680 // promoted nodes.
15681 if (isa<ConstantSDNode>(Inputs[i]))
15682 continue;
15683
15684 SDValue InSrc = Inputs[i].getOperand(0);
15685 if (Inputs[i].getValueType() == N->getValueType(0))
15686 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15687 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15688 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15689 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15690 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15691 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15692 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15693 else
15694 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15695 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15696 }
15697
15698 // Replace all operations (these are all the same, but have a different
15699 // (promoted) return type). DAG.getNode will validate that the types of
15700 // a binary operator match, so go through the list in reverse so that
15701 // we've likely promoted both operands first.
15702 while (!PromOpHandles.empty()) {
15703 SDValue PromOp = PromOpHandles.back().getValue();
15704 PromOpHandles.pop_back();
15705
15706 unsigned C;
15707 switch (PromOp.getOpcode()) {
15708 default: C = 0; break;
15709 case ISD::SELECT: C = 1; break;
15710 case ISD::SELECT_CC: C = 2; break;
15711 }
15712
15713 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15714 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15715 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15716 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15717 // The to-be-promoted operands of this node have not yet been
15718 // promoted (this should be rare because we're going through the
15719 // list backward, but if one of the operands has several users in
15720 // this cluster of to-be-promoted nodes, it is possible).
15721 PromOpHandles.emplace_front(PromOp);
15722 continue;
15723 }
15724
15725 // For SELECT and SELECT_CC nodes, we do a similar check for any
15726 // to-be-promoted comparison inputs.
15727 if (PromOp.getOpcode() == ISD::SELECT ||
15728 PromOp.getOpcode() == ISD::SELECT_CC) {
15729 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15730 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15731 (SelectTruncOp[1].count(PromOp.getNode()) &&
15732 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15733 PromOpHandles.emplace_front(PromOp);
15734 continue;
15735 }
15736 }
15737
15739
15740 // If this node has constant inputs, then they'll need to be promoted here.
15741 for (unsigned i = 0; i < 2; ++i) {
15742 if (!isa<ConstantSDNode>(Ops[C+i]))
15743 continue;
15744 if (Ops[C+i].getValueType() == N->getValueType(0))
15745 continue;
15746
15747 if (N->getOpcode() == ISD::SIGN_EXTEND)
15748 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15749 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15750 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15751 else
15752 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15753 }
15754
15755 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15756 // truncate them again to the original value type.
15757 if (PromOp.getOpcode() == ISD::SELECT ||
15758 PromOp.getOpcode() == ISD::SELECT_CC) {
15759 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15760 if (SI0 != SelectTruncOp[0].end())
15761 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15762 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15763 if (SI1 != SelectTruncOp[1].end())
15764 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15765 }
15766
15767 DAG.ReplaceAllUsesOfValueWith(PromOp,
15768 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15769 }
15770
15771 // Now we're left with the initial extension itself.
15772 if (!ReallyNeedsExt)
15773 return N->getOperand(0);
15774
15775 // To zero extend, just mask off everything except for the first bit (in the
15776 // i1 case).
15777 if (N->getOpcode() == ISD::ZERO_EXTEND)
15778 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15780 N->getValueSizeInBits(0), PromBits),
15781 dl, N->getValueType(0)));
15782
15783 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15784 "Invalid extension type");
15785 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15786 SDValue ShiftCst =
15787 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15788 return DAG.getNode(
15789 ISD::SRA, dl, N->getValueType(0),
15790 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15791 ShiftCst);
15792}
15793
15794// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15796
15797 auto isValidForConvert = [](SDValue &Operand) {
15798 if (!Operand.hasOneUse())
15799 return false;
15800
15801 if (Operand.getValueType() != MVT::i128)
15802 return false;
15803
15804 if (Operand.getOpcode() == ISD::Constant)
15805 return true;
15806
15807 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15808 if (!LoadNode)
15809 return false;
15810
15811 // If memory operation is volatile, do not perform any
15812 // optimization or transformation. Volatile operations must be preserved
15813 // as written to ensure correct program behavior, so we return an empty
15814 // SDValue to indicate no action.
15815
15816 if (LoadNode->isVolatile())
15817 return false;
15818
15819 // Only combine loads if both use the unindexed addressing mode.
15820 // PowerPC AltiVec/VMX does not support vector loads or stores with
15821 // pre/post-increment addressing. Indexed modes may imply implicit
15822 // pointer updates, which are not compatible with AltiVec vector
15823 // instructions.
15824 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15825 return false;
15826
15827 // Only combine loads if both are non-extending loads
15828 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15829 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15830 // loaded value's semantics and are not compatible with vector loads.
15831 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15832 return false;
15833
15834 return true;
15835 };
15836
15837 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15838}
15839
15841 const SDLoc &DL) {
15842
15843 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15844
15845 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15846 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15847 "CC mus be ISD::SETNE or ISD::SETEQ");
15848
15849 auto getV16i8Load = [&](const SDValue &Operand) {
15850 if (Operand.getOpcode() == ISD::Constant)
15851 return DAG.getBitcast(MVT::v16i8, Operand);
15852
15853 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15854
15855 auto *LoadNode = cast<LoadSDNode>(Operand);
15856 SDValue NewLoad =
15857 DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15858 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15859 DAG.ReplaceAllUsesOfValueWith(Operand.getValue(1), NewLoad.getValue(1));
15860 return NewLoad;
15861 };
15862
15863 // Following code transforms the DAG
15864 // t0: ch,glue = EntryToken
15865 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15866 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15867 // undef:i64
15868 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15869 // t5: i128,ch =
15870 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15871 // setcc t3, t5, setne:ch
15872 //
15873 // ---->
15874 //
15875 // t0: ch,glue = EntryToken
15876 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15877 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15878 // undef:i64
15879 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15880 // t5: v16i8,ch =
15881 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15882 // t6: i32 =
15883 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15884 // Constant:i32<2>, t3, t5
15885 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15886
15887 // Or transforms the DAG
15888 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15889 // t8: i1 =
15890 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15891 //
15892 // --->
15893 //
15894 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15895 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15896 // t7: i32 =
15897 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15898
15899 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15900 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15901
15902 SDValue IntrID =
15903 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15904 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15905 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15906 IntrID, CRSel, LHSVec, RHSVec);
15907 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15908 // so we need to invert the CC opcode.
15909 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15910 DAG.getConstant(0, DL, MVT::i32),
15911 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15912}
15913
15914// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15915// If it is , return true; otherwise return false.
15917 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15918
15919 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15920 if (CC != ISD::SETEQ)
15921 return false;
15922
15923 SDValue LHS = N->getOperand(0);
15924 SDValue RHS = N->getOperand(1);
15925
15926 // Check the `SDValue &V` is from `and` with `1`.
15927 auto IsAndWithOne = [](SDValue &V) {
15928 if (V.getOpcode() == ISD::AND) {
15929 for (const SDValue &Op : V->ops())
15930 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15931 if (C->isOne())
15932 return true;
15933 }
15934 return false;
15935 };
15936
15937 // Check whether the SETCC compare with zero.
15938 auto IsCompareWithZero = [](SDValue &V) {
15939 if (auto *C = dyn_cast<ConstantSDNode>(V))
15940 if (C->isZero())
15941 return true;
15942 return false;
15943 };
15944
15945 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15946 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15947}
15948
15949// You must check whether the `SDNode* N` can be converted to Xori using
15950// the function `static bool canConvertSETCCToXori(SDNode *N)`
15951// before calling the function; otherwise, it may produce incorrect results.
15953
15954 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15955 SDValue LHS = N->getOperand(0);
15956 SDValue RHS = N->getOperand(1);
15957 SDLoc DL(N);
15958
15959 [[maybe_unused]] ISD::CondCode CC =
15960 cast<CondCodeSDNode>(N->getOperand(2))->get();
15961 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15962 // Rewrite it as XORI (and X, 1), 1.
15963 auto MakeXor1 = [&](SDValue V) {
15964 EVT VT = V.getValueType();
15965 SDValue One = DAG.getConstant(1, DL, VT);
15966 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15967 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15968 };
15969
15970 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15971 return MakeXor1(LHS);
15972
15973 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15974 return MakeXor1(RHS);
15975
15976 llvm_unreachable("Should not reach here.");
15977}
15978
15979// Match `sext(setcc X, 0, eq)` and turn it into an ADDIC/SUBFE sequence.
15980//
15981// This generates code for:
15982// X == 0 ? -1 : 0
15983//
15984// On pre-ISA 3.1 targets, this is better than the longer CNTLZW/SRWI/NEG
15985// sequence. This is useful for cases like:
15986// uint8_t f(uint8_t x) { return (x == 0) ? -1 : 0; }
15987//
15988// ISA 3.1+ is skipped because those targets can use SETBC.
15989
15990SDValue PPCTargetLowering::combineSignExtendSetCC(SDNode *N,
15991 DAGCombinerInfo &DCI) const {
15992 if (Subtarget.isISA3_1())
15993 return SDValue();
15994
15995 EVT VT = N->getValueType(0);
15996 if (VT != MVT::i32 && VT != MVT::i64)
15997 return SDValue();
15998
15999 SDValue N0 = N->getOperand(0);
16000 if (N0.getOpcode() != ISD::SETCC)
16001 return SDValue();
16002
16004 SDValue LHS = N0.getOperand(0);
16005 SDValue RHS = N0.getOperand(1);
16006
16007 // Not match: sext (setcc x, 0, eq) or sext (setcc 0, x, eq)
16008 if (CC != ISD::SETEQ || (!isNullConstant(LHS) && !isNullConstant(RHS)))
16009 return SDValue();
16010
16011 SDLoc dl(N);
16012 SelectionDAG &DAG = DCI.DAG;
16014 EVT XVT = X.getValueType(); // The type of x in the setcc x, 0, eq.
16015
16016 if ((XVT == MVT::i64 || VT == MVT::i64) && !Subtarget.isPPC64())
16017 return SDValue();
16018
16019 // On PPC64, i32 carry operations use the full 64-bit XER register,
16020 // so we must use i64 operations to avoid incorrect results.
16021 // Use i64 operations and truncate the result if needed.
16022 if (XVT != MVT::i64 && Subtarget.isPPC64())
16023 // Zero-extend if input type is not 64bits.
16024 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, X);
16025
16026 EVT OpVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
16027
16028 // Generate: SUBFE(ADDC(X, -1)).
16029 SDValue MinusOne = DAG.getAllOnesConstant(dl, OpVT);
16030 SDValue Addc =
16031 DAG.getNode(PPCISD::ADDC, dl, DAG.getVTList(OpVT, MVT::i32), X, MinusOne);
16032 SDValue Carry = Addc.getValue(1);
16033 SDValue Sube = DAG.getNode(PPCISD::SUBE, dl, DAG.getVTList(OpVT, MVT::i32),
16034 Addc, Addc, Carry);
16035
16036 // Truncate back to i32 if we used i64 operations.
16037 if (OpVT == MVT::i64 && VT == MVT::i32)
16038 return DAG.getNode(ISD::TRUNCATE, dl, VT, Sube);
16039
16040 return Sube;
16041}
16042
16043SDValue PPCTargetLowering::combineSetCC(SDNode *N,
16044 DAGCombinerInfo &DCI) const {
16045 assert(N->getOpcode() == ISD::SETCC &&
16046 "Should be called with a SETCC node");
16047
16048 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
16049 // If it is, rewrite it as XORI (and X, 1), 1.
16051 return ConvertSETCCToXori(N, DCI.DAG);
16052
16053 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
16054 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
16055 SDValue LHS = N->getOperand(0);
16056 SDValue RHS = N->getOperand(1);
16057
16058 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
16059 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
16060 LHS.hasOneUse())
16061 std::swap(LHS, RHS);
16062
16063 // x == 0-y --> x+y == 0
16064 // x != 0-y --> x+y != 0
16065 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
16066 RHS.hasOneUse()) {
16067 SDLoc DL(N);
16068 SelectionDAG &DAG = DCI.DAG;
16069 EVT VT = N->getValueType(0);
16070 EVT OpVT = LHS.getValueType();
16071 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
16072 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
16073 }
16074
16075 // Optimization: Fold i128 equality/inequality compares of two loads into a
16076 // vectorized compare using vcmpequb.p when Altivec is available.
16077 //
16078 // Rationale:
16079 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
16080 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
16081 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
16082 // perform a full 128-bit equality check in a single vector compare.
16083 //
16084 // Example Result:
16085 // This transformation replaces memcmp(a, b, 16) with two vector loads
16086 // and one vector compare instruction.
16087
16088 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
16089 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
16090 }
16091
16092 return DAGCombineTruncBoolExt(N, DCI);
16093}
16094
16095// Is this an extending load from an f32 to an f64?
16096static bool isFPExtLoad(SDValue Op) {
16097 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
16098 return LD->getExtensionType() == ISD::EXTLOAD &&
16099 Op.getValueType() == MVT::f64;
16100 return false;
16101}
16102
16103/// Reduces the number of fp-to-int conversion when building a vector.
16104///
16105/// If this vector is built out of floating to integer conversions,
16106/// transform it to a vector built out of floating point values followed by a
16107/// single floating to integer conversion of the vector.
16108/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
16109/// becomes (fptosi (build_vector ($A, $B, ...)))
16110SDValue PPCTargetLowering::
16111combineElementTruncationToVectorTruncation(SDNode *N,
16112 DAGCombinerInfo &DCI) const {
16113 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16114 "Should be called with a BUILD_VECTOR node");
16115
16116 SelectionDAG &DAG = DCI.DAG;
16117 SDLoc dl(N);
16118
16119 SDValue FirstInput = N->getOperand(0);
16120 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
16121 "The input operand must be an fp-to-int conversion.");
16122
16123 // This combine happens after legalization so the fp_to_[su]i nodes are
16124 // already converted to PPCSISD nodes.
16125 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
16126 if (FirstConversion == PPCISD::FCTIDZ ||
16127 FirstConversion == PPCISD::FCTIDUZ ||
16128 FirstConversion == PPCISD::FCTIWZ ||
16129 FirstConversion == PPCISD::FCTIWUZ) {
16130 bool IsSplat = true;
16131 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
16132 FirstConversion == PPCISD::FCTIWUZ;
16133 EVT SrcVT = FirstInput.getOperand(0).getValueType();
16135 EVT TargetVT = N->getValueType(0);
16136 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16137 SDValue NextOp = N->getOperand(i);
16138 if (NextOp.getOpcode() != PPCISD::MFVSR)
16139 return SDValue();
16140 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
16141 if (NextConversion != FirstConversion)
16142 return SDValue();
16143 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
16144 // This is not valid if the input was originally double precision. It is
16145 // also not profitable to do unless this is an extending load in which
16146 // case doing this combine will allow us to combine consecutive loads.
16147 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
16148 return SDValue();
16149 if (N->getOperand(i) != FirstInput)
16150 IsSplat = false;
16151 }
16152
16153 // If this is a splat, we leave it as-is since there will be only a single
16154 // fp-to-int conversion followed by a splat of the integer. This is better
16155 // for 32-bit and smaller ints and neutral for 64-bit ints.
16156 if (IsSplat)
16157 return SDValue();
16158
16159 // Now that we know we have the right type of node, get its operands
16160 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
16161 SDValue In = N->getOperand(i).getOperand(0);
16162 if (Is32Bit) {
16163 // For 32-bit values, we need to add an FP_ROUND node (if we made it
16164 // here, we know that all inputs are extending loads so this is safe).
16165 if (In.isUndef())
16166 Ops.push_back(DAG.getUNDEF(SrcVT));
16167 else {
16168 SDValue Trunc =
16169 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
16170 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
16171 Ops.push_back(Trunc);
16172 }
16173 } else
16174 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
16175 }
16176
16177 unsigned Opcode;
16178 if (FirstConversion == PPCISD::FCTIDZ ||
16179 FirstConversion == PPCISD::FCTIWZ)
16180 Opcode = ISD::FP_TO_SINT;
16181 else
16182 Opcode = ISD::FP_TO_UINT;
16183
16184 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
16185 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
16186 return DAG.getNode(Opcode, dl, TargetVT, BV);
16187 }
16188 return SDValue();
16189}
16190
16191// LXVKQ instruction load VSX vector with a special quadword value
16192// based on an immediate value. This helper method returns the details of the
16193// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
16194// to help generate the LXVKQ instruction and the subsequent shift instruction
16195// required to match the original build vector pattern.
16196
16197// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
16198using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
16199
16200static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
16201
16202 // LXVKQ instruction loads the Quadword value:
16203 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
16204 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
16205 static const uint32_t Uim = 16;
16206
16207 // Check for direct LXVKQ match (no shift needed)
16208 if (FullVal == BasePattern)
16209 return std::make_tuple(Uim, uint8_t{0});
16210
16211 // Check if FullValue is 1 (the result of the base pattern >> 127)
16212 if (FullVal == APInt(128, 1))
16213 return std::make_tuple(Uim, uint8_t{127});
16214
16215 return std::nullopt;
16216}
16217
16218/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
16219/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
16220/// LXVKQ instruction load VSX vector with a special quadword value based on an
16221/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
16222/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
16223/// This can be used to inline the build vector constants that have the
16224/// following patterns:
16225///
16226/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
16227/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
16228/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
16229/// combination of splatting and right shift instructions.
16230
16231SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
16232 SelectionDAG &DAG) const {
16233
16234 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
16235 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
16236
16237 // This transformation is only supported if we are loading either a byte,
16238 // halfword, word, or doubleword.
16239 EVT VT = Op.getValueType();
16240 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
16241 VT == MVT::v2i64))
16242 return SDValue();
16243
16244 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
16245 << VT.getEVTString() << "): ";
16246 Op->dump());
16247
16248 unsigned NumElems = VT.getVectorNumElements();
16249 unsigned ElemBits = VT.getScalarSizeInBits();
16250
16251 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
16252
16253 // Check for Non-constant operand in the build vector.
16254 for (const SDValue &Operand : Op.getNode()->op_values()) {
16255 if (!isa<ConstantSDNode>(Operand))
16256 return SDValue();
16257 }
16258
16259 // Assemble build vector operands as a 128-bit register value
16260 // We need to reconstruct what the 128-bit register pattern would be
16261 // that produces this vector when interpreted with the current endianness
16262 APInt FullVal = APInt::getZero(128);
16263
16264 for (unsigned Index = 0; Index < NumElems; ++Index) {
16265 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
16266
16267 // Get element value as raw bits (zero-extended)
16268 uint64_t ElemValue = C->getZExtValue();
16269
16270 // Mask to element size to ensure we only get the relevant bits
16271 if (ElemBits < 64)
16272 ElemValue &= ((1ULL << ElemBits) - 1);
16273
16274 // Calculate bit position for this element in the 128-bit register
16275 unsigned BitPos =
16276 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
16277
16278 // Create APInt for the element value and shift it to correct position
16279 APInt ElemAPInt(128, ElemValue);
16280 ElemAPInt <<= BitPos;
16281
16282 // Place the element value at the correct bit position
16283 FullVal |= ElemAPInt;
16284 }
16285
16286 if (FullVal.isZero() || FullVal.isAllOnes())
16287 return SDValue();
16288
16289 if (auto UIMOpt = getPatternInfo(FullVal)) {
16290 const auto &[Uim, ShiftAmount] = *UIMOpt;
16291 SDLoc Dl(Op);
16292
16293 // Generate LXVKQ instruction if the shift amount is zero.
16294 if (ShiftAmount == 0) {
16295 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16296 SDValue LxvkqInstr =
16297 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16299 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16300 LxvkqInstr.dump());
16301 return LxvkqInstr;
16302 }
16303
16304 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16305
16306 // The right shifted pattern can be constructed using a combination of
16307 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16308 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16309 // value 255.
16310 SDValue ShiftAmountVec =
16311 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16312 DAG.getTargetConstant(255, Dl, MVT::i32)),
16313 0);
16314 // Generate appropriate right shift instruction
16315 SDValue ShiftVec = SDValue(
16316 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16317 0);
16319 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16320 ShiftVec.dump());
16321 return ShiftVec;
16322 }
16323 // No patterns matched for build vectors.
16324 return SDValue();
16325}
16326
16327/// Reduce the number of loads when building a vector.
16328///
16329/// Building a vector out of multiple loads can be converted to a load
16330/// of the vector type if the loads are consecutive. If the loads are
16331/// consecutive but in descending order, a shuffle is added at the end
16332/// to reorder the vector.
16334 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16335 "Should be called with a BUILD_VECTOR node");
16336
16337 SDLoc dl(N);
16338
16339 // Return early for non byte-sized type, as they can't be consecutive.
16340 if (!N->getValueType(0).getVectorElementType().isByteSized())
16341 return SDValue();
16342
16343 bool InputsAreConsecutiveLoads = true;
16344 bool InputsAreReverseConsecutive = true;
16345 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16346 SDValue FirstInput = N->getOperand(0);
16347 bool IsRoundOfExtLoad = false;
16348 LoadSDNode *FirstLoad = nullptr;
16349
16350 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16351 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16352 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16353 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16354 }
16355 // Not a build vector of (possibly fp_rounded) loads.
16356 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16357 N->getNumOperands() == 1)
16358 return SDValue();
16359
16360 if (!IsRoundOfExtLoad)
16361 FirstLoad = cast<LoadSDNode>(FirstInput);
16362
16364 InputLoads.push_back(FirstLoad);
16365 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16366 // If any inputs are fp_round(extload), they all must be.
16367 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16368 return SDValue();
16369
16370 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16371 N->getOperand(i);
16372 if (NextInput.getOpcode() != ISD::LOAD)
16373 return SDValue();
16374
16375 SDValue PreviousInput =
16376 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16377 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16378 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16379
16380 // If any inputs are fp_round(extload), they all must be.
16381 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16382 return SDValue();
16383
16384 // We only care about regular loads. The PPC-specific load intrinsics
16385 // will not lead to a merge opportunity.
16386 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16387 InputsAreConsecutiveLoads = false;
16388 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16389 InputsAreReverseConsecutive = false;
16390
16391 // Exit early if the loads are neither consecutive nor reverse consecutive.
16392 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16393 return SDValue();
16394 InputLoads.push_back(LD2);
16395 }
16396
16397 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16398 "The loads cannot be both consecutive and reverse consecutive.");
16399
16400 SDValue WideLoad;
16401 SDValue ReturnSDVal;
16402 if (InputsAreConsecutiveLoads) {
16403 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16404 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16405 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16406 FirstLoad->getAlign());
16407 ReturnSDVal = WideLoad;
16408 } else if (InputsAreReverseConsecutive) {
16409 LoadSDNode *LastLoad = InputLoads.back();
16410 assert(LastLoad && "Input needs to be a LoadSDNode.");
16411 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16412 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16413 LastLoad->getAlign());
16415 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16416 Ops.push_back(i);
16417
16418 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16419 DAG.getUNDEF(N->getValueType(0)), Ops);
16420 } else
16421 return SDValue();
16422
16423 for (auto *LD : InputLoads)
16424 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16425 return ReturnSDVal;
16426}
16427
16428// This function adds the required vector_shuffle needed to get
16429// the elements of the vector extract in the correct position
16430// as specified by the CorrectElems encoding.
16432 SDValue Input, uint64_t Elems,
16433 uint64_t CorrectElems) {
16434 SDLoc dl(N);
16435
16436 unsigned NumElems = Input.getValueType().getVectorNumElements();
16437 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16438
16439 // Knowing the element indices being extracted from the original
16440 // vector and the order in which they're being inserted, just put
16441 // them at element indices required for the instruction.
16442 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16443 if (DAG.getDataLayout().isLittleEndian())
16444 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16445 else
16446 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16447 CorrectElems = CorrectElems >> 8;
16448 Elems = Elems >> 8;
16449 }
16450
16451 SDValue Shuffle =
16452 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16453 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16454
16455 EVT VT = N->getValueType(0);
16456 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16457
16458 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16459 Input.getValueType().getVectorElementType(),
16461 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16462 DAG.getValueType(ExtVT));
16463}
16464
16465// Look for build vector patterns where input operands come from sign
16466// extended vector_extract elements of specific indices. If the correct indices
16467// aren't used, add a vector shuffle to fix up the indices and create
16468// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16469// during instruction selection.
16471 // This array encodes the indices that the vector sign extend instructions
16472 // extract from when extending from one type to another for both BE and LE.
16473 // The right nibble of each byte corresponds to the LE incides.
16474 // and the left nibble of each byte corresponds to the BE incides.
16475 // For example: 0x3074B8FC byte->word
16476 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16477 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16478 // For example: 0x000070F8 byte->double word
16479 // For LE: the allowed indices are: 0x0,0x8
16480 // For BE: the allowed indices are: 0x7,0xF
16481 uint64_t TargetElems[] = {
16482 0x3074B8FC, // b->w
16483 0x000070F8, // b->d
16484 0x10325476, // h->w
16485 0x00003074, // h->d
16486 0x00001032, // w->d
16487 };
16488
16489 uint64_t Elems = 0;
16490 int Index;
16491 SDValue Input;
16492
16493 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16494 if (!Op)
16495 return false;
16496 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16497 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16498 return false;
16499
16500 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16501 // of the right width.
16502 SDValue Extract = Op.getOperand(0);
16503 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16504 Extract = Extract.getOperand(0);
16505 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16506 return false;
16507
16509 if (!ExtOp)
16510 return false;
16511
16512 Index = ExtOp->getZExtValue();
16513 if (Input && Input != Extract.getOperand(0))
16514 return false;
16515
16516 if (!Input)
16517 Input = Extract.getOperand(0);
16518
16519 Elems = Elems << 8;
16520 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16521 Elems |= Index;
16522
16523 return true;
16524 };
16525
16526 // If the build vector operands aren't sign extended vector extracts,
16527 // of the same input vector, then return.
16528 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16529 if (!isSExtOfVecExtract(N->getOperand(i))) {
16530 return SDValue();
16531 }
16532 }
16533
16534 // If the vector extract indices are not correct, add the appropriate
16535 // vector_shuffle.
16536 int TgtElemArrayIdx;
16537 int InputSize = Input.getValueType().getScalarSizeInBits();
16538 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16539 if (InputSize + OutputSize == 40)
16540 TgtElemArrayIdx = 0;
16541 else if (InputSize + OutputSize == 72)
16542 TgtElemArrayIdx = 1;
16543 else if (InputSize + OutputSize == 48)
16544 TgtElemArrayIdx = 2;
16545 else if (InputSize + OutputSize == 80)
16546 TgtElemArrayIdx = 3;
16547 else if (InputSize + OutputSize == 96)
16548 TgtElemArrayIdx = 4;
16549 else
16550 return SDValue();
16551
16552 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16553 CorrectElems = DAG.getDataLayout().isLittleEndian()
16554 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16555 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16556 if (Elems != CorrectElems) {
16557 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16558 }
16559
16560 // Regular lowering will catch cases where a shuffle is not needed.
16561 return SDValue();
16562}
16563
16564// Look for the pattern of a load from a narrow width to i128, feeding
16565// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16566// (LXVRZX). This node represents a zero extending load that will be matched
16567// to the Load VSX Vector Rightmost instructions.
16569 SDLoc DL(N);
16570
16571 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16572 if (N->getValueType(0) != MVT::v1i128)
16573 return SDValue();
16574
16575 SDValue Operand = N->getOperand(0);
16576 // Proceed with the transformation if the operand to the BUILD_VECTOR
16577 // is a load instruction.
16578 if (Operand.getOpcode() != ISD::LOAD)
16579 return SDValue();
16580
16581 auto *LD = cast<LoadSDNode>(Operand);
16582 EVT MemoryType = LD->getMemoryVT();
16583
16584 // This transformation is only valid if the we are loading either a byte,
16585 // halfword, word, or doubleword.
16586 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16587 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16588
16589 // Ensure that the load from the narrow width is being zero extended to i128.
16590 if (!ValidLDType ||
16591 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16592 LD->getExtensionType() != ISD::EXTLOAD))
16593 return SDValue();
16594
16595 SDValue LoadOps[] = {
16596 LD->getChain(), LD->getBasePtr(),
16597 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16598
16599 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16600 DAG.getVTList(MVT::v1i128, MVT::Other),
16601 LoadOps, MemoryType, LD->getMemOperand());
16602}
16603
16604SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16605 DAGCombinerInfo &DCI) const {
16606 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16607 "Should be called with a BUILD_VECTOR node");
16608
16609 SelectionDAG &DAG = DCI.DAG;
16610 SDLoc dl(N);
16611
16612 if (!Subtarget.hasVSX())
16613 return SDValue();
16614
16615 // The target independent DAG combiner will leave a build_vector of
16616 // float-to-int conversions intact. We can generate MUCH better code for
16617 // a float-to-int conversion of a vector of floats.
16618 SDValue FirstInput = N->getOperand(0);
16619 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16620 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16621 if (Reduced)
16622 return Reduced;
16623 }
16624
16625 // If we're building a vector out of consecutive loads, just load that
16626 // vector type.
16627 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16628 if (Reduced)
16629 return Reduced;
16630
16631 // If we're building a vector out of extended elements from another vector
16632 // we have P9 vector integer extend instructions. The code assumes legal
16633 // input types (i.e. it can't handle things like v4i16) so do not run before
16634 // legalization.
16635 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16636 Reduced = combineBVOfVecSExt(N, DAG);
16637 if (Reduced)
16638 return Reduced;
16639 }
16640
16641 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16642 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16643 // is a load from <valid narrow width> to i128.
16644 if (Subtarget.isISA3_1()) {
16645 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16646 if (BVOfZLoad)
16647 return BVOfZLoad;
16648 }
16649
16650 if (N->getValueType(0) != MVT::v2f64)
16651 return SDValue();
16652
16653 // Looking for:
16654 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16655 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16656 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16657 return SDValue();
16658 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16659 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16660 return SDValue();
16661 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16662 return SDValue();
16663
16664 SDValue Ext1 = FirstInput.getOperand(0);
16665 SDValue Ext2 = N->getOperand(1).getOperand(0);
16666 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16668 return SDValue();
16669
16670 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16671 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16672 if (!Ext1Op || !Ext2Op)
16673 return SDValue();
16674 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16675 Ext1.getOperand(0) != Ext2.getOperand(0))
16676 return SDValue();
16677
16678 int FirstElem = Ext1Op->getZExtValue();
16679 int SecondElem = Ext2Op->getZExtValue();
16680 int SubvecIdx;
16681 if (FirstElem == 0 && SecondElem == 1)
16682 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16683 else if (FirstElem == 2 && SecondElem == 3)
16684 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16685 else
16686 return SDValue();
16687
16688 SDValue SrcVec = Ext1.getOperand(0);
16689 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16690 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16691 return DAG.getNode(NodeType, dl, MVT::v2f64,
16692 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16693}
16694
16695SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16696 DAGCombinerInfo &DCI) const {
16697 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16698 N->getOpcode() == ISD::UINT_TO_FP) &&
16699 "Need an int -> FP conversion node here");
16700
16701 if (useSoftFloat() || !Subtarget.has64BitSupport())
16702 return SDValue();
16703
16704 SelectionDAG &DAG = DCI.DAG;
16705 SDLoc dl(N);
16706 SDValue Op(N, 0);
16707
16708 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16709 // from the hardware.
16710 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16711 return SDValue();
16712 if (!Op.getOperand(0).getValueType().isSimple())
16713 return SDValue();
16714 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16715 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16716 return SDValue();
16717
16718 SDValue FirstOperand(Op.getOperand(0));
16719 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16720 (FirstOperand.getValueType() == MVT::i8 ||
16721 FirstOperand.getValueType() == MVT::i16);
16722 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16723 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16724 bool DstDouble = Op.getValueType() == MVT::f64;
16725 unsigned ConvOp = Signed ?
16726 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16727 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16728 SDValue WidthConst =
16729 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16730 dl, false);
16731 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16732 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16733 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16734 DAG.getVTList(MVT::f64, MVT::Other),
16735 Ops, MVT::i8, LDN->getMemOperand());
16736 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16737
16738 // For signed conversion, we need to sign-extend the value in the VSR
16739 if (Signed) {
16740 SDValue ExtOps[] = { Ld, WidthConst };
16741 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16742 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16743 } else
16744 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16745 }
16746
16747
16748 // For i32 intermediate values, unfortunately, the conversion functions
16749 // leave the upper 32 bits of the value are undefined. Within the set of
16750 // scalar instructions, we have no method for zero- or sign-extending the
16751 // value. Thus, we cannot handle i32 intermediate values here.
16752 if (Op.getOperand(0).getValueType() == MVT::i32)
16753 return SDValue();
16754
16755 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16756 "UINT_TO_FP is supported only with FPCVT");
16757
16758 // If we have FCFIDS, then use it when converting to single-precision.
16759 // Otherwise, convert to double-precision and then round.
16760 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16761 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16762 : PPCISD::FCFIDS)
16763 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16764 : PPCISD::FCFID);
16765 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16766 ? MVT::f32
16767 : MVT::f64;
16768
16769 // If we're converting from a float, to an int, and back to a float again,
16770 // then we don't need the store/load pair at all.
16771 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16772 Subtarget.hasFPCVT()) ||
16773 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16774 SDValue Src = Op.getOperand(0).getOperand(0);
16775 if (Src.getValueType() == MVT::f32) {
16776 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16777 DCI.AddToWorklist(Src.getNode());
16778 } else if (Src.getValueType() != MVT::f64) {
16779 // Make sure that we don't pick up a ppc_fp128 source value.
16780 return SDValue();
16781 }
16782
16783 unsigned FCTOp =
16784 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16785 PPCISD::FCTIDUZ;
16786
16787 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16788 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16789
16790 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16791 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16792 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16793 DCI.AddToWorklist(FP.getNode());
16794 }
16795
16796 return FP;
16797 }
16798
16799 return SDValue();
16800}
16801
16802// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16803// builtins) into loads with swaps.
16805 DAGCombinerInfo &DCI) const {
16806 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16807 // load combines.
16808 if (DCI.isBeforeLegalizeOps())
16809 return SDValue();
16810
16811 SelectionDAG &DAG = DCI.DAG;
16812 SDLoc dl(N);
16813 SDValue Chain;
16814 SDValue Base;
16815 MachineMemOperand *MMO;
16816
16817 switch (N->getOpcode()) {
16818 default:
16819 llvm_unreachable("Unexpected opcode for little endian VSX load");
16820 case ISD::LOAD: {
16822 Chain = LD->getChain();
16823 Base = LD->getBasePtr();
16824 MMO = LD->getMemOperand();
16825 // If the MMO suggests this isn't a load of a full vector, leave
16826 // things alone. For a built-in, we have to make the change for
16827 // correctness, so if there is a size problem that will be a bug.
16828 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16829 return SDValue();
16830 break;
16831 }
16834 Chain = Intrin->getChain();
16835 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16836 // us what we want. Get operand 2 instead.
16837 Base = Intrin->getOperand(2);
16838 MMO = Intrin->getMemOperand();
16839 break;
16840 }
16841 }
16842
16843 MVT VecTy = N->getValueType(0).getSimpleVT();
16844
16845 SDValue LoadOps[] = { Chain, Base };
16846 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16847 DAG.getVTList(MVT::v2f64, MVT::Other),
16848 LoadOps, MVT::v2f64, MMO);
16849
16850 DCI.AddToWorklist(Load.getNode());
16851 Chain = Load.getValue(1);
16852 SDValue Swap = DAG.getNode(
16853 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16854 DCI.AddToWorklist(Swap.getNode());
16855
16856 // Add a bitcast if the resulting load type doesn't match v2f64.
16857 if (VecTy != MVT::v2f64) {
16858 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16859 DCI.AddToWorklist(N.getNode());
16860 // Package {bitcast value, swap's chain} to match Load's shape.
16861 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16862 N, Swap.getValue(1));
16863 }
16864
16865 return Swap;
16866}
16867
16868// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16869// builtins) into stores with swaps.
16871 DAGCombinerInfo &DCI) const {
16872 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16873 // store combines.
16874 if (DCI.isBeforeLegalizeOps())
16875 return SDValue();
16876
16877 SelectionDAG &DAG = DCI.DAG;
16878 SDLoc dl(N);
16879 SDValue Chain;
16880 SDValue Base;
16881 unsigned SrcOpnd;
16882 MachineMemOperand *MMO;
16883
16884 switch (N->getOpcode()) {
16885 default:
16886 llvm_unreachable("Unexpected opcode for little endian VSX store");
16887 case ISD::STORE: {
16889 Chain = ST->getChain();
16890 Base = ST->getBasePtr();
16891 MMO = ST->getMemOperand();
16892 SrcOpnd = 1;
16893 // If the MMO suggests this isn't a store of a full vector, leave
16894 // things alone. For a built-in, we have to make the change for
16895 // correctness, so if there is a size problem that will be a bug.
16896 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16897 return SDValue();
16898 break;
16899 }
16900 case ISD::INTRINSIC_VOID: {
16902 Chain = Intrin->getChain();
16903 // Intrin->getBasePtr() oddly does not get what we want.
16904 Base = Intrin->getOperand(3);
16905 MMO = Intrin->getMemOperand();
16906 SrcOpnd = 2;
16907 break;
16908 }
16909 }
16910
16911 SDValue Src = N->getOperand(SrcOpnd);
16912 MVT VecTy = Src.getValueType().getSimpleVT();
16913
16914 // All stores are done as v2f64 and possible bit cast.
16915 if (VecTy != MVT::v2f64) {
16916 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16917 DCI.AddToWorklist(Src.getNode());
16918 }
16919
16920 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16921 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16922 DCI.AddToWorklist(Swap.getNode());
16923 Chain = Swap.getValue(1);
16924 SDValue StoreOps[] = { Chain, Swap, Base };
16925 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16926 DAG.getVTList(MVT::Other),
16927 StoreOps, VecTy, MMO);
16928 DCI.AddToWorklist(Store.getNode());
16929 return Store;
16930}
16931
16932// Handle DAG combine for STORE (FP_TO_INT F).
16933SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16934 DAGCombinerInfo &DCI) const {
16935 SelectionDAG &DAG = DCI.DAG;
16936 SDLoc dl(N);
16937 unsigned Opcode = N->getOperand(1).getOpcode();
16938 (void)Opcode;
16939 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16940
16941 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16942 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16943 && "Not a FP_TO_INT Instruction!");
16944
16945 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16946 EVT Op1VT = N->getOperand(1).getValueType();
16947 EVT ResVT = Val.getValueType();
16948
16949 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16950 return SDValue();
16951
16952 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16953 bool ValidTypeForStoreFltAsInt =
16954 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16955 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16956
16957 // TODO: Lower conversion from f128 on all VSX targets
16958 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16959 return SDValue();
16960
16961 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16962 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16963 return SDValue();
16964
16965 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16966
16967 // Set number of bytes being converted.
16968 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16969 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16970 DAG.getIntPtrConstant(ByteSize, dl, false),
16971 DAG.getValueType(Op1VT)};
16972
16973 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16974 DAG.getVTList(MVT::Other), Ops,
16975 cast<StoreSDNode>(N)->getMemoryVT(),
16976 cast<StoreSDNode>(N)->getMemOperand());
16977
16978 return Val;
16979}
16980
16981static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16982 // Check that the source of the element keeps flipping
16983 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16984 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16985 for (int i = 1, e = Mask.size(); i < e; i++) {
16986 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16987 return false;
16988 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16989 return false;
16990 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16991 }
16992 return true;
16993}
16994
16995static bool isSplatBV(SDValue Op) {
16996 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16997 return false;
16998 SDValue FirstOp;
16999
17000 // Find first non-undef input.
17001 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
17002 FirstOp = Op.getOperand(i);
17003 if (!FirstOp.isUndef())
17004 break;
17005 }
17006
17007 // All inputs are undef or the same as the first non-undef input.
17008 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
17009 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
17010 return false;
17011 return true;
17012}
17013
17015 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17016 return Op;
17017 if (Op.getOpcode() != ISD::BITCAST)
17018 return SDValue();
17019 Op = Op.getOperand(0);
17020 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
17021 return Op;
17022 return SDValue();
17023}
17024
17025// Fix up the shuffle mask to account for the fact that the result of
17026// scalar_to_vector is not in lane zero. This just takes all values in
17027// the ranges specified by the min/max indices and adds the number of
17028// elements required to ensure each element comes from the respective
17029// position in the valid lane.
17030// On little endian, that's just the corresponding element in the other
17031// half of the vector. On big endian, it is in the same half but right
17032// justified rather than left justified in that half.
17034 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
17035 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
17036 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
17037 int LHSEltFixup =
17038 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
17039 int RHSEltFixup =
17040 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
17041 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
17042 int Idx = ShuffV[I];
17043 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
17044 ShuffV[I] += LHSEltFixup;
17045 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
17046 ShuffV[I] += RHSEltFixup;
17047 }
17048}
17049
17050// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
17051// the original is:
17052// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
17053// In such a case, just change the shuffle mask to extract the element
17054// from the permuted index.
17056 const PPCSubtarget &Subtarget) {
17057 SDLoc dl(OrigSToV);
17058 EVT VT = OrigSToV.getValueType();
17059 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17060 "Expecting a SCALAR_TO_VECTOR here");
17061 SDValue Input = OrigSToV.getOperand(0);
17062
17063 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17064 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
17065 SDValue OrigVector = Input.getOperand(0);
17066
17067 // Can't handle non-const element indices or different vector types
17068 // for the input to the extract and the output of the scalar_to_vector.
17069 if (Idx && VT == OrigVector.getValueType()) {
17070 unsigned NumElts = VT.getVectorNumElements();
17071 assert(
17072 NumElts > 1 &&
17073 "Cannot produce a permuted scalar_to_vector for one element vector");
17074 SmallVector<int, 16> NewMask(NumElts, -1);
17075 unsigned ResultInElt = NumElts / 2;
17076 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
17077 NewMask[ResultInElt] = Idx->getZExtValue();
17078 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
17079 }
17080 }
17081 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
17082 OrigSToV.getOperand(0));
17083}
17084
17086 int HalfVec, int LHSLastElementDefined,
17087 int RHSLastElementDefined) {
17088 for (int Index : ShuffV) {
17089 if (Index < 0) // Skip explicitly undefined mask indices.
17090 continue;
17091 // Handle first input vector of the vector_shuffle.
17092 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
17093 (Index > LHSLastElementDefined))
17094 return false;
17095 // Handle second input vector of the vector_shuffle.
17096 if ((RHSLastElementDefined >= 0) &&
17097 (Index > HalfVec + RHSLastElementDefined))
17098 return false;
17099 }
17100 return true;
17101}
17102
17104 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
17105 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
17106 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
17107 EVT VecShuffOperandType = VecShuffOperand.getValueType();
17108 // Set up the values for the shuffle vector fixup.
17109 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
17110 // The last element depends on if the input comes from the LHS or RHS.
17111 //
17112 // For example:
17113 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
17114 //
17115 // For the LHS: The last element that comes from the LHS is actually 0, not 3
17116 // because elements 1 and higher of a scalar_to_vector are undefined.
17117 // For the RHS: The last element that comes from the RHS is actually 5, not 7
17118 // because elements 1 and higher of a scalar_to_vector are undefined.
17119 // It is also not 4 because the original scalar_to_vector is wider and
17120 // actually contains two i32 elements.
17121 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
17122 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
17123 : FirstElt;
17124 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
17125 if (SToVPermuted.getValueType() != VecShuffOperandType)
17126 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
17127 return SToVPermuted;
17128}
17129
17130// On little endian subtargets, combine shuffles such as:
17131// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
17132// into:
17133// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
17134// because the latter can be matched to a single instruction merge.
17135// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
17136// to put the value into element zero. Adjust the shuffle mask so that the
17137// vector can remain in permuted form (to prevent a swap prior to a shuffle).
17138// On big endian targets, this is still useful for SCALAR_TO_VECTOR
17139// nodes with elements smaller than doubleword because all the ways
17140// of getting scalar data into a vector register put the value in the
17141// rightmost element of the left half of the vector.
17142SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
17143 SelectionDAG &DAG) const {
17144 SDValue LHS = SVN->getOperand(0);
17145 SDValue RHS = SVN->getOperand(1);
17146 auto Mask = SVN->getMask();
17147 int NumElts = LHS.getValueType().getVectorNumElements();
17148 SDValue Res(SVN, 0);
17149 SDLoc dl(SVN);
17150 bool IsLittleEndian = Subtarget.isLittleEndian();
17151
17152 // On big endian targets this is only useful for subtargets with direct moves.
17153 // On little endian targets it would be useful for all subtargets with VSX.
17154 // However adding special handling for LE subtargets without direct moves
17155 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
17156 // which includes direct moves.
17157 if (!Subtarget.hasDirectMove())
17158 return Res;
17159
17160 // If this is not a shuffle of a shuffle and the first element comes from
17161 // the second vector, canonicalize to the commuted form. This will make it
17162 // more likely to match one of the single instruction patterns.
17163 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
17164 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
17165 std::swap(LHS, RHS);
17166 Res = DAG.getCommutedVectorShuffle(*SVN);
17167
17168 if (!isa<ShuffleVectorSDNode>(Res))
17169 return Res;
17170
17171 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17172 }
17173
17174 // Adjust the shuffle mask if either input vector comes from a
17175 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
17176 // form (to prevent the need for a swap).
17177 SmallVector<int, 16> ShuffV(Mask);
17178 SDValue SToVLHS = isScalarToVec(LHS);
17179 SDValue SToVRHS = isScalarToVec(RHS);
17180 if (SToVLHS || SToVRHS) {
17181 EVT VT = SVN->getValueType(0);
17182 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
17183 int ShuffleNumElts = ShuffV.size();
17184 int HalfVec = ShuffleNumElts / 2;
17185 // The width of the "valid lane" (i.e. the lane that contains the value that
17186 // is vectorized) needs to be expressed in terms of the number of elements
17187 // of the shuffle. It is thereby the ratio of the values before and after
17188 // any bitcast, which will be set later on if the LHS or RHS are
17189 // SCALAR_TO_VECTOR nodes.
17190 unsigned LHSNumValidElts = HalfVec;
17191 unsigned RHSNumValidElts = HalfVec;
17192
17193 // Initially assume that neither input is permuted. These will be adjusted
17194 // accordingly if either input is. Note, that -1 means that all elements
17195 // are undefined.
17196 int LHSFirstElt = 0;
17197 int RHSFirstElt = ShuffleNumElts;
17198 int LHSLastElt = -1;
17199 int RHSLastElt = -1;
17200
17201 // Get the permuted scalar to vector nodes for the source(s) that come from
17202 // ISD::SCALAR_TO_VECTOR.
17203 // On big endian systems, this only makes sense for element sizes smaller
17204 // than 64 bits since for 64-bit elements, all instructions already put
17205 // the value into element zero. Since scalar size of LHS and RHS may differ
17206 // after isScalarToVec, this should be checked using their own sizes.
17207 int LHSScalarSize = 0;
17208 int RHSScalarSize = 0;
17209 if (SToVLHS) {
17210 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
17211 if (!IsLittleEndian && LHSScalarSize >= 64)
17212 return Res;
17213 }
17214 if (SToVRHS) {
17215 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
17216 if (!IsLittleEndian && RHSScalarSize >= 64)
17217 return Res;
17218 }
17219 if (LHSScalarSize != 0)
17221 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
17222 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
17223 if (RHSScalarSize != 0)
17225 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
17226 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
17227
17228 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
17229 return Res;
17230
17231 // Fix up the shuffle mask to reflect where the desired element actually is.
17232 // The minimum and maximum indices that correspond to element zero for both
17233 // the LHS and RHS are computed and will control which shuffle mask entries
17234 // are to be changed. For example, if the RHS is permuted, any shuffle mask
17235 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
17237 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
17238 LHSNumValidElts, RHSNumValidElts, Subtarget);
17239 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17240
17241 // We may have simplified away the shuffle. We won't be able to do anything
17242 // further with it here.
17243 if (!isa<ShuffleVectorSDNode>(Res))
17244 return Res;
17245 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
17246 }
17247
17248 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
17249 // The common case after we commuted the shuffle is that the RHS is a splat
17250 // and we have elements coming in from the splat at indices that are not
17251 // conducive to using a merge.
17252 // Example:
17253 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
17254 if (!isSplatBV(TheSplat))
17255 return Res;
17256
17257 // We are looking for a mask such that all even elements are from
17258 // one vector and all odd elements from the other.
17259 if (!isAlternatingShuffMask(Mask, NumElts))
17260 return Res;
17261
17262 // Adjust the mask so we are pulling in the same index from the splat
17263 // as the index from the interesting vector in consecutive elements.
17264 if (IsLittleEndian) {
17265 // Example (even elements from first vector):
17266 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
17267 if (Mask[0] < NumElts)
17268 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17269 if (ShuffV[i] < 0)
17270 continue;
17271 // If element from non-splat is undef, pick first element from splat.
17272 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
17273 }
17274 // Example (odd elements from first vector):
17275 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
17276 else
17277 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17278 if (ShuffV[i] < 0)
17279 continue;
17280 // If element from non-splat is undef, pick first element from splat.
17281 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17282 }
17283 } else {
17284 // Example (even elements from first vector):
17285 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17286 if (Mask[0] < NumElts)
17287 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17288 if (ShuffV[i] < 0)
17289 continue;
17290 // If element from non-splat is undef, pick first element from splat.
17291 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17292 }
17293 // Example (odd elements from first vector):
17294 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17295 else
17296 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17297 if (ShuffV[i] < 0)
17298 continue;
17299 // If element from non-splat is undef, pick first element from splat.
17300 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17301 }
17302 }
17303
17304 // If the RHS has undefs, we need to remove them since we may have created
17305 // a shuffle that adds those instead of the splat value.
17306 SDValue SplatVal =
17307 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17308 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17309
17310 if (IsLittleEndian)
17311 RHS = TheSplat;
17312 else
17313 LHS = TheSplat;
17314 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17315}
17316
17317SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17318 LSBaseSDNode *LSBase,
17319 DAGCombinerInfo &DCI) const {
17320 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17321 "Not a reverse memop pattern!");
17322
17323 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17324 auto Mask = SVN->getMask();
17325 int i = 0;
17326 auto I = Mask.rbegin();
17327 auto E = Mask.rend();
17328
17329 for (; I != E; ++I) {
17330 if (*I != i)
17331 return false;
17332 i++;
17333 }
17334 return true;
17335 };
17336
17337 SelectionDAG &DAG = DCI.DAG;
17338 EVT VT = SVN->getValueType(0);
17339
17340 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17341 return SDValue();
17342
17343 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17344 // See comment in PPCVSXSwapRemoval.cpp.
17345 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17346 if (!Subtarget.hasP9Vector())
17347 return SDValue();
17348
17349 if(!IsElementReverse(SVN))
17350 return SDValue();
17351
17352 if (LSBase->getOpcode() == ISD::LOAD) {
17353 // If the load return value 0 has more than one user except the
17354 // shufflevector instruction, it is not profitable to replace the
17355 // shufflevector with a reverse load.
17356 for (SDUse &Use : LSBase->uses())
17357 if (Use.getResNo() == 0 &&
17358 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17359 return SDValue();
17360
17361 SDLoc dl(LSBase);
17362 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17363 return DAG.getMemIntrinsicNode(
17364 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17365 LSBase->getMemoryVT(), LSBase->getMemOperand());
17366 }
17367
17368 if (LSBase->getOpcode() == ISD::STORE) {
17369 // If there are other uses of the shuffle, the swap cannot be avoided.
17370 // Forcing the use of an X-Form (since swapped stores only have
17371 // X-Forms) without removing the swap is unprofitable.
17372 if (!SVN->hasOneUse())
17373 return SDValue();
17374
17375 SDLoc dl(LSBase);
17376 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17377 LSBase->getBasePtr()};
17378 return DAG.getMemIntrinsicNode(
17379 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17380 LSBase->getMemoryVT(), LSBase->getMemOperand());
17381 }
17382
17383 llvm_unreachable("Expected a load or store node here");
17384}
17385
17386static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17387 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17388 if (IntrinsicID == Intrinsic::ppc_stdcx)
17389 StoreWidth = 8;
17390 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17391 StoreWidth = 4;
17392 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17393 StoreWidth = 2;
17394 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17395 StoreWidth = 1;
17396 else
17397 return false;
17398 return true;
17399}
17400
17403 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17404 // (ADDC (ADDE 0, 0, C), -1) -> C
17405 SDValue LHS = N->getOperand(0);
17406 SDValue RHS = N->getOperand(1);
17407 if (LHS->getOpcode() == PPCISD::ADDE &&
17408 isNullConstant(LHS->getOperand(0)) &&
17409 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17410 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17411 }
17412 }
17413 return SDValue();
17414}
17415
17416/// Optimize the bitfloor(X) pattern for PowerPC.
17417/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
17418/// Into: srl MinSignedValue, (ctlz X)
17419///
17420/// This is safe on PowerPC because the srw instruction returns 0 when the
17421/// shift amount is == bitwidth, which matches the behavior we need for X=0.
17423 if (N->getOpcode() != ISD::SELECT_CC)
17424 return SDValue();
17425
17426 // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
17427 SDValue CmpLHS = N->getOperand(0);
17428 SDValue CmpRHS = N->getOperand(1);
17429 SDValue TrueVal = N->getOperand(2);
17430 SDValue FalseVal = N->getOperand(3);
17431 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
17432
17433 // Check if condition is (X == 0)
17434 if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
17435 return SDValue();
17436
17437 // Check if TrueVal is constant 0
17438 if (!isNullConstant(TrueVal))
17439 return SDValue();
17440
17441 // This combine is replacing a select_cc with a PPC srl, not an srl with a
17442 // PPC srl. If the original srl had multiple uses it would just remain in the
17443 // code. This is at most a performance consideration.
17444 if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
17445 return SDValue();
17446
17447 SDValue ShiftVal = FalseVal.getOperand(0);
17448 SDValue ShiftAmt = FalseVal.getOperand(1);
17449
17450 // Check if ShiftVal is MinSignedValue
17451 auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
17452 if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
17453 return SDValue();
17454
17455 SDValue CtlzArg;
17456 // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
17457 if (ShiftAmt.getOpcode() != ISD::CTLZ) {
17458 // Look through truncate if present (for i64 ctlz truncated to i32 shift
17459 // amount)
17460 if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
17461 return SDValue();
17462
17463 // Verify the truncate target type is appropriate for shift amount (i32, not
17464 // i1 or other)
17465 if (ShiftAmt.getValueType() != MVT::i32)
17466 return SDValue();
17467
17468 SDValue CtlzNode = ShiftAmt.getOperand(0);
17469
17470 if (CtlzNode.getOpcode() != ISD::CTLZ)
17471 return SDValue();
17472
17473 CtlzArg = CtlzNode.getOperand(0);
17474 } else {
17475 CtlzArg = ShiftAmt.getOperand(0);
17476 }
17477
17478 // Check if ctlz operates on the same value as the comparison
17479 if (CtlzArg != CmpLHS)
17480 return SDValue();
17481
17482 // Using PPCISD::SRL to ensure well-defined behavior.
17483 // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
17484 // which is exactly what we need for the bitfloor(0) case.
17485 SDLoc DL(N);
17486 SDValue PPCSrl =
17487 DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
17488 return PPCSrl;
17489}
17490
17491// Optimize zero-extension of setcc when the compared value is known to be 0
17492// or 1.
17493//
17494// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
17495// -> zext(xor(Value, 1)) for seteq
17496// -> zext(Value) for setne
17497//
17498// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
17499// by keeping the value in its original i32 type throughout.
17500//
17501// Example:
17502// Before: zext(setcc(test_data_class(...), 0, seteq))
17503// // test_data_class returns 0 or 1 in i32
17504// // setcc converts i32 -> i1
17505// // zext converts i1 -> i64
17506// After: zext(xor(test_data_class(...), 1))
17507// // Stays in i32, then extends to i64
17508//
17509// This is beneficial because:
17510// 1. Eliminates the setcc instruction
17511// 2. Avoids i32 -> i1 truncation
17512// 3. Keeps computation in native integer width
17513
17515 // Check if this is a zero_extend
17516 if (N->getOpcode() != ISD::ZERO_EXTEND)
17517 return SDValue();
17518
17519 SDValue Src = N->getOperand(0);
17520
17521 // Check if the source is a setcc
17522 if (Src.getOpcode() != ISD::SETCC)
17523 return SDValue();
17524
17525 SDValue LHS = Src.getOperand(0);
17526 SDValue RHS = Src.getOperand(1);
17527 ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
17528
17530 return SDValue();
17531
17532 SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
17533
17534 auto isZeroOrOne = [=](SDValue &V) {
17535 if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17536 V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
17537 return true;
17538 return false;
17539 };
17540
17541 if (!isZeroOrOne(NonNullConstant))
17542 return SDValue();
17543
17544 // Check for pattern: zext(setcc (Value), 0, seteq)) or
17545 // zext(setcc (Value), 0, setne))
17546 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
17547 // Replace with: zext(xor(Value, 1)) for seteq
17548 // or: zext(Value) for setne
17549 // This keeps the value in i32 instead of converting to i1
17550 SDLoc DL(N);
17551 EVT VType = N->getValueType(0);
17552 SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
17553
17554 if (CC == ISD::SETNE)
17555 return NewNonNullConstant;
17556
17557 SDValue One = DAG.getConstant(1, DL, VType);
17558 return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
17559 }
17560
17561 return SDValue();
17562}
17563
17564// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
17565// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
17566// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
17567// 1, cc))
17568// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
17569// 0, 1, cc))
17570// 4. etc
17572 assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
17573
17574 EVT XorVT = N->getValueType(0);
17575 if ((XorVT != MVT::i32 && XorVT != MVT::i64))
17576 return SDValue();
17577
17578 SDValue LHS = N->getOperand(0);
17579 SDValue RHS = N->getOperand(1);
17580
17581 // Check for XOR with constant 1
17583 if (!XorConst || !XorConst->isOne()) {
17584 XorConst = dyn_cast<ConstantSDNode>(LHS);
17585 if (!XorConst || !XorConst->isOne())
17586 return SDValue();
17587 // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
17588 std::swap(LHS, RHS);
17589 }
17590
17591 // Check if LHS has only one use
17592 if (!LHS.hasOneUse())
17593 return SDValue();
17594
17595 // Handle extensions: ZEXT, ANYEXT
17596 SDValue SelectNode = LHS;
17597
17598 if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
17599 LHS.getOpcode() == ISD::ANY_EXTEND) {
17600 SelectNode = LHS.getOperand(0);
17601
17602 // Check if the extension input has only one use
17603 if (!SelectNode.hasOneUse())
17604 return SDValue();
17605 }
17606
17607 // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
17608 if (!SelectNode.isMachineOpcode())
17609 return SDValue();
17610
17611 unsigned MachineOpc = SelectNode.getMachineOpcode();
17612
17613 // Handle both SELECT_CC_I4 and SELECT_CC_I8
17614 if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
17615 return SDValue();
17616
17617 // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
17618 if (SelectNode.getNumOperands() != 4)
17619 return SDValue();
17620
17621 ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
17622 ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
17623
17624 if (!ConstOp1 || !ConstOp2)
17625 return SDValue();
17626
17627 // Only optimize if operands are {0, 1} or {1, 0}
17628 if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
17629 (ConstOp1->isZero() && ConstOp2->isOne())))
17630 return SDValue();
17631
17632 // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
17633 // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
17634 // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
17635 // create SELECT_CC(cond, 1, 0, pred).
17636 SDLoc DL(N);
17637 MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
17638
17639 bool ConstOp1IsOne = ConstOp1->isOne();
17640 return SDValue(
17641 DAG.getMachineNode(MachineOpc, DL, XorVT,
17642 {SelectNode.getOperand(0),
17643 DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
17644 DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
17645 SelectNode.getOperand(3)}),
17646 0);
17647}
17648
17650 DAGCombinerInfo &DCI) const {
17651 SelectionDAG &DAG = DCI.DAG;
17652 SDLoc dl(N);
17653 switch (N->getOpcode()) {
17654 default: break;
17655 case ISD::ADD:
17656 return combineADD(N, DCI);
17657 case ISD::AND: {
17658 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17659 // original input as that will prevent us from selecting optimal rotates.
17660 // This only matters if the input to the extend is i32 widened to i64.
17661 SDValue Op1 = N->getOperand(0);
17662 SDValue Op2 = N->getOperand(1);
17663 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17664 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17665 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17666 Op1.getOperand(0).getValueType() != MVT::i32)
17667 break;
17668 SDValue NarrowOp = Op1.getOperand(0);
17669 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17670 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17671 break;
17672
17673 uint64_t Imm = Op2->getAsZExtVal();
17674 // Make sure that the constant is narrow enough to fit in the narrow type.
17675 if (!isUInt<32>(Imm))
17676 break;
17677 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17678 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17679 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17680 }
17681 case ISD::XOR: {
17682 // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
17683 if (SDValue V = combineXorSelectCC(N, DAG))
17684 return V;
17685 break;
17686 }
17687 case ISD::SHL:
17688 return combineSHL(N, DCI);
17689 case ISD::SRA:
17690 return combineSRA(N, DCI);
17691 case ISD::SRL:
17692 return combineSRL(N, DCI);
17693 case ISD::MUL:
17694 return combineMUL(N, DCI);
17695 case ISD::FMA:
17696 case PPCISD::FNMSUB:
17697 return combineFMALike(N, DCI);
17698 case PPCISD::SHL:
17699 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17700 return N->getOperand(0);
17701 break;
17702 case PPCISD::SRL:
17703 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17704 return N->getOperand(0);
17705 break;
17706 case PPCISD::SRA:
17707 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17708 if (C->isZero() || // 0 >>s V -> 0.
17709 C->isAllOnes()) // -1 >>s V -> -1.
17710 return N->getOperand(0);
17711 }
17712 break;
17713 case ISD::SIGN_EXTEND:
17714 if (SDValue SECC = combineSignExtendSetCC(N, DCI))
17715 return SECC;
17716 [[fallthrough]];
17717 case ISD::ZERO_EXTEND:
17718 if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
17719 return RetV;
17720 [[fallthrough]];
17721 case ISD::ANY_EXTEND:
17722 return DAGCombineExtBoolTrunc(N, DCI);
17723 case ISD::TRUNCATE:
17724 return combineTRUNCATE(N, DCI);
17725 case ISD::SETCC:
17726 if (SDValue CSCC = combineSetCC(N, DCI))
17727 return CSCC;
17728 [[fallthrough]];
17729 case ISD::SELECT_CC:
17730 if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
17731 return V;
17732 return DAGCombineTruncBoolExt(N, DCI);
17733 case ISD::SINT_TO_FP:
17734 case ISD::UINT_TO_FP:
17735 return combineFPToIntToFP(N, DCI);
17737 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17738 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17739 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17740 }
17741 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17742 case ISD::STORE: {
17743
17744 EVT Op1VT = N->getOperand(1).getValueType();
17745 unsigned Opcode = N->getOperand(1).getOpcode();
17746
17747 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17748 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17749 SDValue Val = combineStoreFPToInt(N, DCI);
17750 if (Val)
17751 return Val;
17752 }
17753
17754 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17755 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17756 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17757 if (Val)
17758 return Val;
17759 }
17760
17761 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17762 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17763 N->getOperand(1).getNode()->hasOneUse() &&
17764 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17765 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17766
17767 // STBRX can only handle simple types and it makes no sense to store less
17768 // two bytes in byte-reversed order.
17769 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17770 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17771 break;
17772
17773 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17774 // Do an any-extend to 32-bits if this is a half-word input.
17775 if (BSwapOp.getValueType() == MVT::i16)
17776 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17777
17778 // If the type of BSWAP operand is wider than stored memory width
17779 // it need to be shifted to the right side before STBRX.
17780 if (Op1VT.bitsGT(mVT)) {
17781 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17782 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17783 DAG.getConstant(Shift, dl, MVT::i32));
17784 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17785 if (Op1VT == MVT::i64)
17786 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17787 }
17788
17789 SDValue Ops[] = {
17790 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17791 };
17792 return
17793 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17794 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17795 cast<StoreSDNode>(N)->getMemOperand());
17796 }
17797
17798 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17799 // So it can increase the chance of CSE constant construction.
17800 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17801 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17802 // Need to sign-extended to 64-bits to handle negative values.
17803 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17804 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17805 MemVT.getSizeInBits());
17806 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17807
17808 auto *ST = cast<StoreSDNode>(N);
17809 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17810 ST->getBasePtr(), ST->getOffset(), MemVT,
17811 ST->getMemOperand(), ST->getAddressingMode(),
17812 /*IsTruncating=*/true);
17813 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17814 // new store which will change the constant by removing non-demanded bits.
17815 return ST->isUnindexed()
17816 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17817 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17818 }
17819
17820 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17821 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17822 if (Op1VT.isSimple()) {
17823 MVT StoreVT = Op1VT.getSimpleVT();
17824 if (Subtarget.needsSwapsForVSXMemOps() &&
17825 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17826 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17827 return expandVSXStoreForLE(N, DCI);
17828 }
17829 break;
17830 }
17831 case ISD::LOAD: {
17833 EVT VT = LD->getValueType(0);
17834
17835 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17836 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17837 if (VT.isSimple()) {
17838 MVT LoadVT = VT.getSimpleVT();
17839 if (Subtarget.needsSwapsForVSXMemOps() &&
17840 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17841 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17842 return expandVSXLoadForLE(N, DCI);
17843 }
17844
17845 // We sometimes end up with a 64-bit integer load, from which we extract
17846 // two single-precision floating-point numbers. This happens with
17847 // std::complex<float>, and other similar structures, because of the way we
17848 // canonicalize structure copies. However, if we lack direct moves,
17849 // then the final bitcasts from the extracted integer values to the
17850 // floating-point numbers turn into store/load pairs. Even with direct moves,
17851 // just loading the two floating-point numbers is likely better.
17852 auto ReplaceTwoFloatLoad = [&]() {
17853 if (VT != MVT::i64)
17854 return false;
17855
17856 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17857 LD->isVolatile())
17858 return false;
17859
17860 // We're looking for a sequence like this:
17861 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17862 // t16: i64 = srl t13, Constant:i32<32>
17863 // t17: i32 = truncate t16
17864 // t18: f32 = bitcast t17
17865 // t19: i32 = truncate t13
17866 // t20: f32 = bitcast t19
17867
17868 if (!LD->hasNUsesOfValue(2, 0))
17869 return false;
17870
17871 auto UI = LD->user_begin();
17872 while (UI.getUse().getResNo() != 0) ++UI;
17873 SDNode *Trunc = *UI++;
17874 while (UI.getUse().getResNo() != 0) ++UI;
17875 SDNode *RightShift = *UI;
17876 if (Trunc->getOpcode() != ISD::TRUNCATE)
17877 std::swap(Trunc, RightShift);
17878
17879 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17880 Trunc->getValueType(0) != MVT::i32 ||
17881 !Trunc->hasOneUse())
17882 return false;
17883 if (RightShift->getOpcode() != ISD::SRL ||
17884 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17885 RightShift->getConstantOperandVal(1) != 32 ||
17886 !RightShift->hasOneUse())
17887 return false;
17888
17889 SDNode *Trunc2 = *RightShift->user_begin();
17890 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17891 Trunc2->getValueType(0) != MVT::i32 ||
17892 !Trunc2->hasOneUse())
17893 return false;
17894
17895 SDNode *Bitcast = *Trunc->user_begin();
17896 SDNode *Bitcast2 = *Trunc2->user_begin();
17897
17898 if (Bitcast->getOpcode() != ISD::BITCAST ||
17899 Bitcast->getValueType(0) != MVT::f32)
17900 return false;
17901 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17902 Bitcast2->getValueType(0) != MVT::f32)
17903 return false;
17904
17905 if (Subtarget.isLittleEndian())
17906 std::swap(Bitcast, Bitcast2);
17907
17908 // Bitcast has the second float (in memory-layout order) and Bitcast2
17909 // has the first one.
17910
17911 SDValue BasePtr = LD->getBasePtr();
17912 if (LD->isIndexed()) {
17913 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17914 "Non-pre-inc AM on PPC?");
17915 BasePtr =
17916 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17917 LD->getOffset());
17918 }
17919
17920 auto MMOFlags =
17921 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17922 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17923 LD->getPointerInfo(), LD->getAlign(),
17924 MMOFlags, LD->getAAInfo());
17925 SDValue AddPtr =
17926 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17927 BasePtr, DAG.getIntPtrConstant(4, dl));
17928 SDValue FloatLoad2 = DAG.getLoad(
17929 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17930 LD->getPointerInfo().getWithOffset(4),
17931 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17932
17933 if (LD->isIndexed()) {
17934 // Note that DAGCombine should re-form any pre-increment load(s) from
17935 // what is produced here if that makes sense.
17936 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17937 }
17938
17939 DCI.CombineTo(Bitcast2, FloatLoad);
17940 DCI.CombineTo(Bitcast, FloatLoad2);
17941
17942 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17943 SDValue(FloatLoad2.getNode(), 1));
17944 return true;
17945 };
17946
17947 if (ReplaceTwoFloatLoad())
17948 return SDValue(N, 0);
17949
17950 EVT MemVT = LD->getMemoryVT();
17951 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17952 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17953 if (LD->isUnindexed() && VT.isVector() &&
17954 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17955 // P8 and later hardware should just use LOAD.
17956 !Subtarget.hasP8Vector() &&
17957 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17958 VT == MVT::v4f32))) &&
17959 LD->getAlign() < ABIAlignment) {
17960 // This is a type-legal unaligned Altivec load.
17961 SDValue Chain = LD->getChain();
17962 SDValue Ptr = LD->getBasePtr();
17963 bool isLittleEndian = Subtarget.isLittleEndian();
17964
17965 // This implements the loading of unaligned vectors as described in
17966 // the venerable Apple Velocity Engine overview. Specifically:
17967 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17968 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17969 //
17970 // The general idea is to expand a sequence of one or more unaligned
17971 // loads into an alignment-based permutation-control instruction (lvsl
17972 // or lvsr), a series of regular vector loads (which always truncate
17973 // their input address to an aligned address), and a series of
17974 // permutations. The results of these permutations are the requested
17975 // loaded values. The trick is that the last "extra" load is not taken
17976 // from the address you might suspect (sizeof(vector) bytes after the
17977 // last requested load), but rather sizeof(vector) - 1 bytes after the
17978 // last requested vector. The point of this is to avoid a page fault if
17979 // the base address happened to be aligned. This works because if the
17980 // base address is aligned, then adding less than a full vector length
17981 // will cause the last vector in the sequence to be (re)loaded.
17982 // Otherwise, the next vector will be fetched as you might suspect was
17983 // necessary.
17984
17985 // We might be able to reuse the permutation generation from
17986 // a different base address offset from this one by an aligned amount.
17987 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17988 // optimization later.
17989 Intrinsic::ID Intr, IntrLD, IntrPerm;
17990 MVT PermCntlTy, PermTy, LDTy;
17991 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17992 : Intrinsic::ppc_altivec_lvsl;
17993 IntrLD = Intrinsic::ppc_altivec_lvx;
17994 IntrPerm = Intrinsic::ppc_altivec_vperm;
17995 PermCntlTy = MVT::v16i8;
17996 PermTy = MVT::v4i32;
17997 LDTy = MVT::v4i32;
17998
17999 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
18000
18001 // Create the new MMO for the new base load. It is like the original MMO,
18002 // but represents an area in memory almost twice the vector size centered
18003 // on the original address. If the address is unaligned, we might start
18004 // reading up to (sizeof(vector)-1) bytes below the address of the
18005 // original unaligned load.
18007 MachineMemOperand *BaseMMO =
18008 MF.getMachineMemOperand(LD->getMemOperand(),
18009 -(int64_t)MemVT.getStoreSize()+1,
18010 2*MemVT.getStoreSize()-1);
18011
18012 // Create the new base load.
18013 SDValue LDXIntID =
18014 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
18015 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
18016 SDValue BaseLoad =
18018 DAG.getVTList(PermTy, MVT::Other),
18019 BaseLoadOps, LDTy, BaseMMO);
18020
18021 // Note that the value of IncOffset (which is provided to the next
18022 // load's pointer info offset value, and thus used to calculate the
18023 // alignment), and the value of IncValue (which is actually used to
18024 // increment the pointer value) are different! This is because we
18025 // require the next load to appear to be aligned, even though it
18026 // is actually offset from the base pointer by a lesser amount.
18027 int IncOffset = VT.getSizeInBits() / 8;
18028 int IncValue = IncOffset;
18029
18030 // Walk (both up and down) the chain looking for another load at the real
18031 // (aligned) offset (the alignment of the other load does not matter in
18032 // this case). If found, then do not use the offset reduction trick, as
18033 // that will prevent the loads from being later combined (as they would
18034 // otherwise be duplicates).
18035 if (!findConsecutiveLoad(LD, DAG))
18036 --IncValue;
18037
18039 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
18040 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
18041
18042 MachineMemOperand *ExtraMMO =
18043 MF.getMachineMemOperand(LD->getMemOperand(),
18044 1, 2*MemVT.getStoreSize()-1);
18045 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
18046 SDValue ExtraLoad =
18048 DAG.getVTList(PermTy, MVT::Other),
18049 ExtraLoadOps, LDTy, ExtraMMO);
18050
18051 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18052 BaseLoad.getValue(1), ExtraLoad.getValue(1));
18053
18054 // Because vperm has a big-endian bias, we must reverse the order
18055 // of the input vectors and complement the permute control vector
18056 // when generating little endian code. We have already handled the
18057 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
18058 // and ExtraLoad here.
18059 SDValue Perm;
18060 if (isLittleEndian)
18061 Perm = BuildIntrinsicOp(IntrPerm,
18062 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
18063 else
18064 Perm = BuildIntrinsicOp(IntrPerm,
18065 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
18066
18067 if (VT != PermTy)
18068 Perm = Subtarget.hasAltivec()
18069 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
18070 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
18071 DAG.getTargetConstant(1, dl, MVT::i64));
18072 // second argument is 1 because this rounding
18073 // is always exact.
18074
18075 // The output of the permutation is our loaded result, the TokenFactor is
18076 // our new chain.
18077 DCI.CombineTo(N, Perm, TF);
18078 return SDValue(N, 0);
18079 }
18080 }
18081 break;
18083 bool isLittleEndian = Subtarget.isLittleEndian();
18084 unsigned IID = N->getConstantOperandVal(0);
18085 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
18086 : Intrinsic::ppc_altivec_lvsl);
18087 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
18088 SDValue Add = N->getOperand(1);
18089
18090 int Bits = 4 /* 16 byte alignment */;
18091
18092 if (DAG.MaskedValueIsZero(Add->getOperand(1),
18093 APInt::getAllOnes(Bits /* alignment */)
18094 .zext(Add.getScalarValueSizeInBits()))) {
18095 SDNode *BasePtr = Add->getOperand(0).getNode();
18096 for (SDNode *U : BasePtr->users()) {
18097 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18098 U->getConstantOperandVal(0) == IID) {
18099 // We've found another LVSL/LVSR, and this address is an aligned
18100 // multiple of that one. The results will be the same, so use the
18101 // one we've just found instead.
18102
18103 return SDValue(U, 0);
18104 }
18105 }
18106 }
18107
18108 if (isa<ConstantSDNode>(Add->getOperand(1))) {
18109 SDNode *BasePtr = Add->getOperand(0).getNode();
18110 for (SDNode *U : BasePtr->users()) {
18111 if (U->getOpcode() == ISD::ADD &&
18112 isa<ConstantSDNode>(U->getOperand(1)) &&
18113 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
18114 (1ULL << Bits) ==
18115 0) {
18116 SDNode *OtherAdd = U;
18117 for (SDNode *V : OtherAdd->users()) {
18118 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18119 V->getConstantOperandVal(0) == IID) {
18120 return SDValue(V, 0);
18121 }
18122 }
18123 }
18124 }
18125 }
18126 }
18127
18128 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
18129 // Expose the vabsduw/h/b opportunity for down stream
18130 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
18131 (IID == Intrinsic::ppc_altivec_vmaxsw ||
18132 IID == Intrinsic::ppc_altivec_vmaxsh ||
18133 IID == Intrinsic::ppc_altivec_vmaxsb)) {
18134 SDValue V1 = N->getOperand(1);
18135 SDValue V2 = N->getOperand(2);
18136 if ((V1.getSimpleValueType() == MVT::v4i32 ||
18137 V1.getSimpleValueType() == MVT::v8i16 ||
18138 V1.getSimpleValueType() == MVT::v16i8) &&
18140 // (0-a, a)
18141 if (V1.getOpcode() == ISD::SUB &&
18143 V1.getOperand(1) == V2) {
18144 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
18145 }
18146 // (a, 0-a)
18147 if (V2.getOpcode() == ISD::SUB &&
18149 V2.getOperand(1) == V1) {
18150 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18151 }
18152 // (x-y, y-x)
18153 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
18154 V1.getOperand(0) == V2.getOperand(1) &&
18155 V1.getOperand(1) == V2.getOperand(0)) {
18156 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
18157 }
18158 }
18159 }
18160 }
18161
18162 break;
18164 switch (N->getConstantOperandVal(1)) {
18165 default:
18166 break;
18167 case Intrinsic::ppc_altivec_vsum4sbs:
18168 case Intrinsic::ppc_altivec_vsum4shs:
18169 case Intrinsic::ppc_altivec_vsum4ubs: {
18170 // These sum-across intrinsics only have a chain due to the side effect
18171 // that they may set the SAT bit. If we know the SAT bit will not be set
18172 // for some inputs, we can replace any uses of their chain with the
18173 // input chain.
18174 if (BuildVectorSDNode *BVN =
18175 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
18176 APInt APSplatBits, APSplatUndef;
18177 unsigned SplatBitSize;
18178 bool HasAnyUndefs;
18179 bool BVNIsConstantSplat = BVN->isConstantSplat(
18180 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
18181 !Subtarget.isLittleEndian());
18182 // If the constant splat vector is 0, the SAT bit will not be set.
18183 if (BVNIsConstantSplat && APSplatBits == 0)
18184 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
18185 }
18186 return SDValue();
18187 }
18188 case Intrinsic::ppc_vsx_lxvw4x:
18189 case Intrinsic::ppc_vsx_lxvd2x:
18190 // For little endian, VSX loads require generating lxvd2x/xxswapd.
18191 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
18192 if (Subtarget.needsSwapsForVSXMemOps())
18193 return expandVSXLoadForLE(N, DCI);
18194 break;
18195 }
18196 break;
18198 // For little endian, VSX stores require generating xxswapd/stxvd2x.
18199 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
18200 if (Subtarget.needsSwapsForVSXMemOps()) {
18201 switch (N->getConstantOperandVal(1)) {
18202 default:
18203 break;
18204 case Intrinsic::ppc_vsx_stxvw4x:
18205 case Intrinsic::ppc_vsx_stxvd2x:
18206 return expandVSXStoreForLE(N, DCI);
18207 }
18208 }
18209 break;
18210 case ISD::BSWAP: {
18211 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
18212 // For subtargets without LDBRX, we can still do better than the default
18213 // expansion even for 64-bit BSWAP (LOAD).
18214 bool Is64BitBswapOn64BitTgt =
18215 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
18216 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
18217 N->getOperand(0).hasOneUse();
18218 if (IsSingleUseNormalLd &&
18219 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
18220 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
18221 SDValue Load = N->getOperand(0);
18222 LoadSDNode *LD = cast<LoadSDNode>(Load);
18223 // Create the byte-swapping load.
18224 SDValue Ops[] = {
18225 LD->getChain(), // Chain
18226 LD->getBasePtr(), // Ptr
18227 DAG.getValueType(N->getValueType(0)) // VT
18228 };
18229 SDValue BSLoad =
18230 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
18231 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
18232 MVT::i64 : MVT::i32, MVT::Other),
18233 Ops, LD->getMemoryVT(), LD->getMemOperand());
18234
18235 // If this is an i16 load, insert the truncate.
18236 SDValue ResVal = BSLoad;
18237 if (N->getValueType(0) == MVT::i16)
18238 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
18239
18240 // First, combine the bswap away. This makes the value produced by the
18241 // load dead.
18242 DCI.CombineTo(N, ResVal);
18243
18244 // Next, combine the load away, we give it a bogus result value but a real
18245 // chain result. The result value is dead because the bswap is dead.
18246 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
18247
18248 // Return N so it doesn't get rechecked!
18249 return SDValue(N, 0);
18250 }
18251 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
18252 // before legalization so that the BUILD_PAIR is handled correctly.
18253 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
18254 !IsSingleUseNormalLd)
18255 return SDValue();
18256 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
18257
18258 // Can't split volatile or atomic loads.
18259 if (!LD->isSimple())
18260 return SDValue();
18261 SDValue BasePtr = LD->getBasePtr();
18262 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
18263 LD->getPointerInfo(), LD->getAlign());
18264 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
18265 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
18266 DAG.getIntPtrConstant(4, dl));
18268 LD->getMemOperand(), 4, 4);
18269 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
18270 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
18271 SDValue Res;
18272 if (Subtarget.isLittleEndian())
18273 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
18274 else
18275 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
18276 SDValue TF =
18277 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18278 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
18279 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
18280 return Res;
18281 }
18282 case PPCISD::VCMP:
18283 // If a VCMP_rec node already exists with exactly the same operands as this
18284 // node, use its result instead of this node (VCMP_rec computes both a CR6
18285 // and a normal output).
18286 //
18287 if (!N->getOperand(0).hasOneUse() &&
18288 !N->getOperand(1).hasOneUse() &&
18289 !N->getOperand(2).hasOneUse()) {
18290
18291 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
18292 SDNode *VCMPrecNode = nullptr;
18293
18294 SDNode *LHSN = N->getOperand(0).getNode();
18295 for (SDNode *User : LHSN->users())
18296 if (User->getOpcode() == PPCISD::VCMP_rec &&
18297 User->getOperand(1) == N->getOperand(1) &&
18298 User->getOperand(2) == N->getOperand(2) &&
18299 User->getOperand(0) == N->getOperand(0)) {
18300 VCMPrecNode = User;
18301 break;
18302 }
18303
18304 // If there is no VCMP_rec node, or if the flag value has a single use,
18305 // don't transform this.
18306 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
18307 break;
18308
18309 // Look at the (necessarily single) use of the flag value. If it has a
18310 // chain, this transformation is more complex. Note that multiple things
18311 // could use the value result, which we should ignore.
18312 SDNode *FlagUser = nullptr;
18313 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
18314 FlagUser == nullptr; ++UI) {
18315 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
18316 SDNode *User = UI->getUser();
18317 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
18318 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
18319 FlagUser = User;
18320 break;
18321 }
18322 }
18323 }
18324
18325 // If the user is a MFOCRF instruction, we know this is safe.
18326 // Otherwise we give up for right now.
18327 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
18328 return SDValue(VCMPrecNode, 0);
18329 }
18330 break;
18331 case ISD::BR_CC: {
18332 // If this is a branch on an altivec predicate comparison, lower this so
18333 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
18334 // lowering is done pre-legalize, because the legalizer lowers the predicate
18335 // compare down to code that is difficult to reassemble.
18336 // This code also handles branches that depend on the result of a store
18337 // conditional.
18338 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18339 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
18340
18341 int CompareOpc;
18342 bool isDot;
18343
18344 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
18345 break;
18346
18347 // Since we are doing this pre-legalize, the RHS can be a constant of
18348 // arbitrary bitwidth which may cause issues when trying to get the value
18349 // from the underlying APInt.
18350 auto RHSAPInt = RHS->getAsAPIntVal();
18351 if (!RHSAPInt.isIntN(64))
18352 break;
18353
18354 unsigned Val = RHSAPInt.getZExtValue();
18355 auto isImpossibleCompare = [&]() {
18356 // If this is a comparison against something other than 0/1, then we know
18357 // that the condition is never/always true.
18358 if (Val != 0 && Val != 1) {
18359 if (CC == ISD::SETEQ) // Cond never true, remove branch.
18360 return N->getOperand(0);
18361 // Always !=, turn it into an unconditional branch.
18362 return DAG.getNode(ISD::BR, dl, MVT::Other,
18363 N->getOperand(0), N->getOperand(4));
18364 }
18365 return SDValue();
18366 };
18367 // Combine branches fed by store conditional instructions (st[bhwd]cx).
18368 unsigned StoreWidth = 0;
18369 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
18370 isStoreConditional(LHS, StoreWidth)) {
18371 if (SDValue Impossible = isImpossibleCompare())
18372 return Impossible;
18373 PPC::Predicate CompOpc;
18374 // eq 0 => ne
18375 // ne 0 => eq
18376 // eq 1 => eq
18377 // ne 1 => ne
18378 if (Val == 0)
18379 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
18380 else
18381 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
18382
18383 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
18384 DAG.getConstant(StoreWidth, dl, MVT::i32)};
18385 auto *MemNode = cast<MemSDNode>(LHS);
18386 SDValue ConstSt = DAG.getMemIntrinsicNode(
18387 PPCISD::STORE_COND, dl,
18388 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
18389 MemNode->getMemoryVT(), MemNode->getMemOperand());
18390
18391 SDValue InChain;
18392 // Unchain the branch from the original store conditional.
18393 if (N->getOperand(0) == LHS.getValue(1))
18394 InChain = LHS.getOperand(0);
18395 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
18396 SmallVector<SDValue, 4> InChains;
18397 SDValue InTF = N->getOperand(0);
18398 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
18399 if (InTF.getOperand(i) != LHS.getValue(1))
18400 InChains.push_back(InTF.getOperand(i));
18401 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
18402 }
18403
18404 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
18405 DAG.getConstant(CompOpc, dl, MVT::i32),
18406 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
18407 ConstSt.getValue(2));
18408 }
18409
18410 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18411 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
18412 assert(isDot && "Can't compare against a vector result!");
18413
18414 if (SDValue Impossible = isImpossibleCompare())
18415 return Impossible;
18416
18417 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
18418 // Create the PPCISD altivec 'dot' comparison node.
18419 SDValue Ops[] = {
18420 LHS.getOperand(2), // LHS of compare
18421 LHS.getOperand(3), // RHS of compare
18422 DAG.getConstant(CompareOpc, dl, MVT::i32)
18423 };
18424 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
18425 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
18426
18427 // Unpack the result based on how the target uses it.
18428 PPC::Predicate CompOpc;
18429 switch (LHS.getConstantOperandVal(1)) {
18430 default: // Can't happen, don't crash on invalid number though.
18431 case 0: // Branch on the value of the EQ bit of CR6.
18432 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
18433 break;
18434 case 1: // Branch on the inverted value of the EQ bit of CR6.
18435 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
18436 break;
18437 case 2: // Branch on the value of the LT bit of CR6.
18438 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
18439 break;
18440 case 3: // Branch on the inverted value of the LT bit of CR6.
18441 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
18442 break;
18443 }
18444
18445 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
18446 DAG.getConstant(CompOpc, dl, MVT::i32),
18447 DAG.getRegister(PPC::CR6, MVT::i32),
18448 N->getOperand(4), CompNode.getValue(1));
18449 }
18450 break;
18451 }
18452 case ISD::BUILD_VECTOR:
18453 return DAGCombineBuildVector(N, DCI);
18454 case PPCISD::ADDC:
18455 return DAGCombineAddc(N, DCI);
18456
18457 case ISD::BITCAST:
18458 return DAGCombineBitcast(N, DCI);
18459 }
18460
18461 return SDValue();
18462}
18463
18464SDValue
18466 SelectionDAG &DAG,
18467 SmallVectorImpl<SDNode *> &Created) const {
18468 // fold (sdiv X, pow2)
18469 EVT VT = N->getValueType(0);
18470 if (VT == MVT::i64 && !Subtarget.isPPC64())
18471 return SDValue();
18472 if ((VT != MVT::i32 && VT != MVT::i64) ||
18473 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
18474 return SDValue();
18475
18476 SDLoc DL(N);
18477 SDValue N0 = N->getOperand(0);
18478
18479 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
18480 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
18481 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
18482
18483 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
18484 Created.push_back(Op.getNode());
18485
18486 if (IsNegPow2) {
18487 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
18488 Created.push_back(Op.getNode());
18489 }
18490
18491 return Op;
18492}
18493
18494//===----------------------------------------------------------------------===//
18495// Inline Assembly Support
18496//===----------------------------------------------------------------------===//
18497
18499 KnownBits &Known,
18500 const APInt &DemandedElts,
18501 const SelectionDAG &DAG,
18502 unsigned Depth) const {
18503 Known.resetAll();
18504 switch (Op.getOpcode()) {
18505 default: break;
18506 case PPCISD::LBRX: {
18507 // lhbrx is known to have the top bits cleared out.
18508 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
18509 Known.Zero = 0xFFFF0000;
18510 break;
18511 }
18512 case PPCISD::ADDE: {
18513 if (Op.getResNo() == 0) {
18514 // (0|1), _ = ADDE 0, 0, CARRY
18515 SDValue LHS = Op.getOperand(0);
18516 SDValue RHS = Op.getOperand(1);
18517 if (isNullConstant(LHS) && isNullConstant(RHS))
18518 Known.Zero = ~1ULL;
18519 }
18520 break;
18521 }
18523 switch (Op.getConstantOperandVal(0)) {
18524 default: break;
18525 case Intrinsic::ppc_altivec_vcmpbfp_p:
18526 case Intrinsic::ppc_altivec_vcmpeqfp_p:
18527 case Intrinsic::ppc_altivec_vcmpequb_p:
18528 case Intrinsic::ppc_altivec_vcmpequh_p:
18529 case Intrinsic::ppc_altivec_vcmpequw_p:
18530 case Intrinsic::ppc_altivec_vcmpequd_p:
18531 case Intrinsic::ppc_altivec_vcmpequq_p:
18532 case Intrinsic::ppc_altivec_vcmpgefp_p:
18533 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18534 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18535 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18536 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18537 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18538 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18539 case Intrinsic::ppc_altivec_vcmpgtub_p:
18540 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18541 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18542 case Intrinsic::ppc_altivec_vcmpgtud_p:
18543 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18544 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18545 break;
18546 }
18547 break;
18548 }
18550 switch (Op.getConstantOperandVal(1)) {
18551 default:
18552 break;
18553 case Intrinsic::ppc_load2r:
18554 // Top bits are cleared for load2r (which is the same as lhbrx).
18555 Known.Zero = 0xFFFF0000;
18556 break;
18557 }
18558 break;
18559 }
18560 }
18561}
18562
18564 switch (Subtarget.getCPUDirective()) {
18565 default: break;
18566 case PPC::DIR_970:
18567 case PPC::DIR_PWR4:
18568 case PPC::DIR_PWR5:
18569 case PPC::DIR_PWR5X:
18570 case PPC::DIR_PWR6:
18571 case PPC::DIR_PWR6X:
18572 case PPC::DIR_PWR7:
18573 case PPC::DIR_PWR8:
18574 case PPC::DIR_PWR9:
18575 case PPC::DIR_PWR10:
18576 case PPC::DIR_PWR11:
18577 case PPC::DIR_PWR_FUTURE: {
18578 if (!ML)
18579 break;
18580
18582 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18583 // so that we can decrease cache misses and branch-prediction misses.
18584 // Actual alignment of the loop will depend on the hotness check and other
18585 // logic in alignBlocks.
18586 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18587 return Align(32);
18588 }
18589
18590 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18591
18592 // For small loops (between 5 and 8 instructions), align to a 32-byte
18593 // boundary so that the entire loop fits in one instruction-cache line.
18594 uint64_t LoopSize = 0;
18595 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18596 for (const MachineInstr &J : **I) {
18597 LoopSize += TII->getInstSizeInBytes(J);
18598 if (LoopSize > 32)
18599 break;
18600 }
18601
18602 if (LoopSize > 16 && LoopSize <= 32)
18603 return Align(32);
18604
18605 break;
18606 }
18607 }
18608
18610}
18611
18612/// getConstraintType - Given a constraint, return the type of
18613/// constraint it is for this target.
18616 if (Constraint.size() == 1) {
18617 switch (Constraint[0]) {
18618 default: break;
18619 case 'b':
18620 case 'r':
18621 case 'f':
18622 case 'd':
18623 case 'v':
18624 case 'y':
18625 return C_RegisterClass;
18626 case 'Z':
18627 // FIXME: While Z does indicate a memory constraint, it specifically
18628 // indicates an r+r address (used in conjunction with the 'y' modifier
18629 // in the replacement string). Currently, we're forcing the base
18630 // register to be r0 in the asm printer (which is interpreted as zero)
18631 // and forming the complete address in the second register. This is
18632 // suboptimal.
18633 return C_Memory;
18634 }
18635 } else if (Constraint == "wc") { // individual CR bits.
18636 return C_RegisterClass;
18637 } else if (Constraint == "wa" || Constraint == "wd" ||
18638 Constraint == "wf" || Constraint == "ws" ||
18639 Constraint == "wi" || Constraint == "ww") {
18640 return C_RegisterClass; // VSX registers.
18641 }
18642 return TargetLowering::getConstraintType(Constraint);
18643}
18644
18645/// Examine constraint type and operand type and determine a weight value.
18646/// This object must already have been set up with the operand type
18647/// and the current alternative constraint selected.
18650 AsmOperandInfo &info, const char *constraint) const {
18652 Value *CallOperandVal = info.CallOperandVal;
18653 // If we don't have a value, we can't do a match,
18654 // but allow it at the lowest weight.
18655 if (!CallOperandVal)
18656 return CW_Default;
18657 Type *type = CallOperandVal->getType();
18658
18659 // Look at the constraint type.
18660 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18661 return CW_Register; // an individual CR bit.
18662 else if ((StringRef(constraint) == "wa" ||
18663 StringRef(constraint) == "wd" ||
18664 StringRef(constraint) == "wf") &&
18665 type->isVectorTy())
18666 return CW_Register;
18667 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18668 return CW_Register; // just hold 64-bit integers data.
18669 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18670 return CW_Register;
18671 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18672 return CW_Register;
18673
18674 switch (*constraint) {
18675 default:
18677 break;
18678 case 'b':
18679 if (type->isIntegerTy())
18680 weight = CW_Register;
18681 break;
18682 case 'f':
18683 if (type->isFloatTy())
18684 weight = CW_Register;
18685 break;
18686 case 'd':
18687 if (type->isDoubleTy())
18688 weight = CW_Register;
18689 break;
18690 case 'v':
18691 if (type->isVectorTy())
18692 weight = CW_Register;
18693 break;
18694 case 'y':
18695 weight = CW_Register;
18696 break;
18697 case 'Z':
18698 weight = CW_Memory;
18699 break;
18700 }
18701 return weight;
18702}
18703
18704std::pair<unsigned, const TargetRegisterClass *>
18706 StringRef Constraint,
18707 MVT VT) const {
18708 if (Constraint.size() == 1) {
18709 // GCC RS6000 Constraint Letters
18710 switch (Constraint[0]) {
18711 case 'b': // R1-R31
18712 if (VT == MVT::i64 && Subtarget.isPPC64())
18713 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18714 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18715 case 'r': // R0-R31
18716 if (VT == MVT::i64 && Subtarget.isPPC64())
18717 return std::make_pair(0U, &PPC::G8RCRegClass);
18718 return std::make_pair(0U, &PPC::GPRCRegClass);
18719 // 'd' and 'f' constraints are both defined to be "the floating point
18720 // registers", where one is for 32-bit and the other for 64-bit. We don't
18721 // really care overly much here so just give them all the same reg classes.
18722 case 'd':
18723 case 'f':
18724 if (Subtarget.hasSPE()) {
18725 if (VT == MVT::f32 || VT == MVT::i32)
18726 return std::make_pair(0U, &PPC::GPRCRegClass);
18727 if (VT == MVT::f64 || VT == MVT::i64)
18728 return std::make_pair(0U, &PPC::SPERCRegClass);
18729 } else {
18730 if (VT == MVT::f32 || VT == MVT::i32)
18731 return std::make_pair(0U, &PPC::F4RCRegClass);
18732 if (VT == MVT::f64 || VT == MVT::i64)
18733 return std::make_pair(0U, &PPC::F8RCRegClass);
18734 }
18735 break;
18736 case 'v':
18737 if (Subtarget.hasAltivec() && VT.isVector())
18738 return std::make_pair(0U, &PPC::VRRCRegClass);
18739 else if (Subtarget.hasVSX())
18740 // Scalars in Altivec registers only make sense with VSX.
18741 return std::make_pair(0U, &PPC::VFRCRegClass);
18742 break;
18743 case 'y': // crrc
18744 return std::make_pair(0U, &PPC::CRRCRegClass);
18745 }
18746 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18747 // An individual CR bit.
18748 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18749 } else if ((Constraint == "wa" || Constraint == "wd" ||
18750 Constraint == "wf" || Constraint == "wi") &&
18751 Subtarget.hasVSX()) {
18752 // A VSX register for either a scalar (FP) or vector. There is no
18753 // support for single precision scalars on subtargets prior to Power8.
18754 if (VT.isVector())
18755 return std::make_pair(0U, &PPC::VSRCRegClass);
18756 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18757 return std::make_pair(0U, &PPC::VSSRCRegClass);
18758 return std::make_pair(0U, &PPC::VSFRCRegClass);
18759 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18760 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18761 return std::make_pair(0U, &PPC::VSSRCRegClass);
18762 else
18763 return std::make_pair(0U, &PPC::VSFRCRegClass);
18764 } else if (Constraint == "lr") {
18765 if (VT == MVT::i64)
18766 return std::make_pair(0U, &PPC::LR8RCRegClass);
18767 else
18768 return std::make_pair(0U, &PPC::LRRCRegClass);
18769 }
18770
18771 // Handle special cases of physical registers that are not properly handled
18772 // by the base class.
18773 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18774 // If we name a VSX register, we can't defer to the base class because it
18775 // will not recognize the correct register (their names will be VSL{0-31}
18776 // and V{0-31} so they won't match). So we match them here.
18777 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18778 int VSNum = atoi(Constraint.data() + 3);
18779 assert(VSNum >= 0 && VSNum <= 63 &&
18780 "Attempted to access a vsr out of range");
18781 if (VSNum < 32)
18782 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18783 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18784 }
18785
18786 // For float registers, we can't defer to the base class as it will match
18787 // the SPILLTOVSRRC class.
18788 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18789 int RegNum = atoi(Constraint.data() + 2);
18790 if (RegNum > 31 || RegNum < 0)
18791 report_fatal_error("Invalid floating point register number");
18792 if (VT == MVT::f32 || VT == MVT::i32)
18793 return Subtarget.hasSPE()
18794 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18795 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18796 if (VT == MVT::f64 || VT == MVT::i64)
18797 return Subtarget.hasSPE()
18798 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18799 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18800 }
18801 }
18802
18803 std::pair<unsigned, const TargetRegisterClass *> R =
18805
18806 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18807 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18808 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18809 // register.
18810 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18811 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18812 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18813 PPC::GPRCRegClass.contains(R.first))
18814 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18815 PPC::sub_32, &PPC::G8RCRegClass),
18816 &PPC::G8RCRegClass);
18817
18818 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18819 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18820 R.first = PPC::CR0;
18821 R.second = &PPC::CRRCRegClass;
18822 }
18823 // FIXME: This warning should ideally be emitted in the front end.
18824 const auto &TM = getTargetMachine();
18825 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18826 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18827 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18828 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18829 errs() << "warning: vector registers 20 to 32 are reserved in the "
18830 "default AIX AltiVec ABI and cannot be used\n";
18831 }
18832
18833 return R;
18834}
18835
18836/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18837/// vector. If it is invalid, don't add anything to Ops.
18839 StringRef Constraint,
18840 std::vector<SDValue> &Ops,
18841 SelectionDAG &DAG) const {
18842 SDValue Result;
18843
18844 // Only support length 1 constraints.
18845 if (Constraint.size() > 1)
18846 return;
18847
18848 char Letter = Constraint[0];
18849 switch (Letter) {
18850 default: break;
18851 case 'I':
18852 case 'J':
18853 case 'K':
18854 case 'L':
18855 case 'M':
18856 case 'N':
18857 case 'O':
18858 case 'P': {
18860 if (!CST) return; // Must be an immediate to match.
18861 SDLoc dl(Op);
18862 int64_t Value = CST->getSExtValue();
18863 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18864 // numbers are printed as such.
18865 switch (Letter) {
18866 default: llvm_unreachable("Unknown constraint letter!");
18867 case 'I': // "I" is a signed 16-bit constant.
18868 if (isInt<16>(Value))
18869 Result = DAG.getTargetConstant(Value, dl, TCVT);
18870 break;
18871 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18873 Result = DAG.getTargetConstant(Value, dl, TCVT);
18874 break;
18875 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18877 Result = DAG.getTargetConstant(Value, dl, TCVT);
18878 break;
18879 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18880 if (isUInt<16>(Value))
18881 Result = DAG.getTargetConstant(Value, dl, TCVT);
18882 break;
18883 case 'M': // "M" is a constant that is greater than 31.
18884 if (Value > 31)
18885 Result = DAG.getTargetConstant(Value, dl, TCVT);
18886 break;
18887 case 'N': // "N" is a positive constant that is an exact power of two.
18888 if (Value > 0 && isPowerOf2_64(Value))
18889 Result = DAG.getTargetConstant(Value, dl, TCVT);
18890 break;
18891 case 'O': // "O" is the constant zero.
18892 if (Value == 0)
18893 Result = DAG.getTargetConstant(Value, dl, TCVT);
18894 break;
18895 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18896 if (isInt<16>(-Value))
18897 Result = DAG.getTargetConstant(Value, dl, TCVT);
18898 break;
18899 }
18900 break;
18901 }
18902 }
18903
18904 if (Result.getNode()) {
18905 Ops.push_back(Result);
18906 return;
18907 }
18908
18909 // Handle standard constraint letters.
18911}
18912
18915 SelectionDAG &DAG) const {
18916 if (I.getNumOperands() <= 1)
18917 return;
18918 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18919 return;
18920 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18921 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18922 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18923 return;
18924
18925 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18926 Ops.push_back(DAG.getMDNode(MDN));
18927}
18928
18929// isLegalAddressingMode - Return true if the addressing mode represented
18930// by AM is legal for this target, for a load/store of the specified type.
18932 const AddrMode &AM, Type *Ty,
18933 unsigned AS,
18934 Instruction *I) const {
18935 // Vector type r+i form is supported since power9 as DQ form. We don't check
18936 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18937 // imm form is preferred and the offset can be adjusted to use imm form later
18938 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18939 // max offset to check legal addressing mode, we should be a little aggressive
18940 // to contain other offsets for that LSRUse.
18941 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18942 return false;
18943
18944 // PPC allows a sign-extended 16-bit immediate field.
18945 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18946 return false;
18947
18948 // No global is ever allowed as a base.
18949 if (AM.BaseGV)
18950 return false;
18951
18952 // PPC only support r+r,
18953 switch (AM.Scale) {
18954 case 0: // "r+i" or just "i", depending on HasBaseReg.
18955 break;
18956 case 1:
18957 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18958 return false;
18959 // Otherwise we have r+r or r+i.
18960 break;
18961 case 2:
18962 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18963 return false;
18964 // Allow 2*r as r+r.
18965 break;
18966 default:
18967 // No other scales are supported.
18968 return false;
18969 }
18970
18971 return true;
18972}
18973
18974SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18975 SelectionDAG &DAG) const {
18977 MachineFrameInfo &MFI = MF.getFrameInfo();
18978 MFI.setReturnAddressIsTaken(true);
18979
18980 SDLoc dl(Op);
18981 unsigned Depth = Op.getConstantOperandVal(0);
18982
18983 // Make sure the function does not optimize away the store of the RA to
18984 // the stack.
18985 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18986 FuncInfo->setLRStoreRequired();
18987 auto PtrVT = getPointerTy(MF.getDataLayout());
18988
18989 if (Depth > 0) {
18990 // The link register (return address) is saved in the caller's frame
18991 // not the callee's stack frame. So we must get the caller's frame
18992 // address and load the return address at the LR offset from there.
18993 SDValue FrameAddr =
18994 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18996 SDValue Offset =
18997 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18998 Subtarget.getScalarIntVT());
18999 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
19000 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
19002 }
19003
19004 // Just load the return address off the stack.
19005 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
19006 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
19008}
19009
19010SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
19011 SelectionDAG &DAG) const {
19012 SDLoc dl(Op);
19013 unsigned Depth = Op.getConstantOperandVal(0);
19014
19015 MachineFunction &MF = DAG.getMachineFunction();
19016 MachineFrameInfo &MFI = MF.getFrameInfo();
19017 MFI.setFrameAddressIsTaken(true);
19018
19019 EVT PtrVT = getPointerTy(MF.getDataLayout());
19020 bool isPPC64 = PtrVT == MVT::i64;
19021
19022 // Naked functions never have a frame pointer, and so we use r1. For all
19023 // other functions, this decision must be delayed until during PEI.
19024 unsigned FrameReg;
19025 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
19026 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
19027 else
19028 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
19029
19030 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
19031 PtrVT);
19032 while (Depth--)
19033 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
19034 FrameAddr, MachinePointerInfo());
19035 return FrameAddr;
19036}
19037
19038#define GET_REGISTER_MATCHER
19039#include "PPCGenAsmMatcher.inc"
19040
19042 const MachineFunction &MF) const {
19043 bool IsPPC64 = Subtarget.isPPC64();
19044
19045 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
19046 if (!Is64Bit && VT != LLT::scalar(32))
19047 report_fatal_error("Invalid register global variable type");
19048
19050 if (!Reg)
19051 return Reg;
19052
19053 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
19054 // Need followup investigation as to why.
19055 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
19056 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
19057 StringRef(RegName) + "\"."));
19058
19059 // Convert GPR to GP8R register for 64bit.
19060 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
19061 Reg = Reg.id() - PPC::R0 + PPC::X0;
19062
19063 return Reg;
19064}
19065
19067 // 32-bit SVR4 ABI access everything as got-indirect.
19068 if (Subtarget.is32BitELFABI())
19069 return true;
19070
19071 // AIX accesses everything indirectly through the TOC, which is similar to
19072 // the GOT.
19073 if (Subtarget.isAIXABI())
19074 return true;
19075
19077 // If it is small or large code model, module locals are accessed
19078 // indirectly by loading their address from .toc/.got.
19079 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
19080 return true;
19081
19082 // JumpTable and BlockAddress are accessed as got-indirect.
19084 return true;
19085
19087 return Subtarget.isGVIndirectSymbol(G->getGlobal());
19088
19089 return false;
19090}
19091
19092bool
19094 // The PowerPC target isn't yet aware of offsets.
19095 return false;
19096}
19097
19100 MachineFunction &MF, unsigned Intrinsic) const {
19101 IntrinsicInfo Info;
19102 switch (Intrinsic) {
19103 case Intrinsic::ppc_atomicrmw_xchg_i128:
19104 case Intrinsic::ppc_atomicrmw_add_i128:
19105 case Intrinsic::ppc_atomicrmw_sub_i128:
19106 case Intrinsic::ppc_atomicrmw_nand_i128:
19107 case Intrinsic::ppc_atomicrmw_and_i128:
19108 case Intrinsic::ppc_atomicrmw_or_i128:
19109 case Intrinsic::ppc_atomicrmw_xor_i128:
19110 case Intrinsic::ppc_cmpxchg_i128:
19111 Info.opc = ISD::INTRINSIC_W_CHAIN;
19112 Info.memVT = MVT::i128;
19113 Info.ptrVal = I.getArgOperand(0);
19114 Info.offset = 0;
19115 Info.align = Align(16);
19118 Infos.push_back(Info);
19119 return;
19120 case Intrinsic::ppc_atomic_load_i128:
19121 Info.opc = ISD::INTRINSIC_W_CHAIN;
19122 Info.memVT = MVT::i128;
19123 Info.ptrVal = I.getArgOperand(0);
19124 Info.offset = 0;
19125 Info.align = Align(16);
19127 Infos.push_back(Info);
19128 return;
19129 case Intrinsic::ppc_atomic_store_i128:
19130 Info.opc = ISD::INTRINSIC_VOID;
19131 Info.memVT = MVT::i128;
19132 Info.ptrVal = I.getArgOperand(2);
19133 Info.offset = 0;
19134 Info.align = Align(16);
19136 Infos.push_back(Info);
19137 return;
19138 case Intrinsic::ppc_altivec_lvx:
19139 case Intrinsic::ppc_altivec_lvxl:
19140 case Intrinsic::ppc_altivec_lvebx:
19141 case Intrinsic::ppc_altivec_lvehx:
19142 case Intrinsic::ppc_altivec_lvewx:
19143 case Intrinsic::ppc_vsx_lxvd2x:
19144 case Intrinsic::ppc_vsx_lxvw4x:
19145 case Intrinsic::ppc_vsx_lxvd2x_be:
19146 case Intrinsic::ppc_vsx_lxvw4x_be:
19147 case Intrinsic::ppc_vsx_lxvl:
19148 case Intrinsic::ppc_vsx_lxvll: {
19149 EVT VT;
19150 switch (Intrinsic) {
19151 case Intrinsic::ppc_altivec_lvebx:
19152 VT = MVT::i8;
19153 break;
19154 case Intrinsic::ppc_altivec_lvehx:
19155 VT = MVT::i16;
19156 break;
19157 case Intrinsic::ppc_altivec_lvewx:
19158 VT = MVT::i32;
19159 break;
19160 case Intrinsic::ppc_vsx_lxvd2x:
19161 case Intrinsic::ppc_vsx_lxvd2x_be:
19162 VT = MVT::v2f64;
19163 break;
19164 default:
19165 VT = MVT::v4i32;
19166 break;
19167 }
19168
19169 Info.opc = ISD::INTRINSIC_W_CHAIN;
19170 Info.memVT = VT;
19171 Info.ptrVal = I.getArgOperand(0);
19172 Info.offset = -VT.getStoreSize()+1;
19173 Info.size = 2*VT.getStoreSize()-1;
19174 Info.align = Align(1);
19175 Info.flags = MachineMemOperand::MOLoad;
19176 Infos.push_back(Info);
19177 return;
19178 }
19179 case Intrinsic::ppc_altivec_stvx:
19180 case Intrinsic::ppc_altivec_stvxl:
19181 case Intrinsic::ppc_altivec_stvebx:
19182 case Intrinsic::ppc_altivec_stvehx:
19183 case Intrinsic::ppc_altivec_stvewx:
19184 case Intrinsic::ppc_vsx_stxvd2x:
19185 case Intrinsic::ppc_vsx_stxvw4x:
19186 case Intrinsic::ppc_vsx_stxvd2x_be:
19187 case Intrinsic::ppc_vsx_stxvw4x_be:
19188 case Intrinsic::ppc_vsx_stxvl:
19189 case Intrinsic::ppc_vsx_stxvll: {
19190 EVT VT;
19191 switch (Intrinsic) {
19192 case Intrinsic::ppc_altivec_stvebx:
19193 VT = MVT::i8;
19194 break;
19195 case Intrinsic::ppc_altivec_stvehx:
19196 VT = MVT::i16;
19197 break;
19198 case Intrinsic::ppc_altivec_stvewx:
19199 VT = MVT::i32;
19200 break;
19201 case Intrinsic::ppc_vsx_stxvd2x:
19202 case Intrinsic::ppc_vsx_stxvd2x_be:
19203 VT = MVT::v2f64;
19204 break;
19205 default:
19206 VT = MVT::v4i32;
19207 break;
19208 }
19209
19210 Info.opc = ISD::INTRINSIC_VOID;
19211 Info.memVT = VT;
19212 Info.ptrVal = I.getArgOperand(1);
19213 Info.offset = -VT.getStoreSize()+1;
19214 Info.size = 2*VT.getStoreSize()-1;
19215 Info.align = Align(1);
19216 Info.flags = MachineMemOperand::MOStore;
19217 Infos.push_back(Info);
19218 return;
19219 }
19220 case Intrinsic::ppc_stdcx:
19221 case Intrinsic::ppc_stwcx:
19222 case Intrinsic::ppc_sthcx:
19223 case Intrinsic::ppc_stbcx: {
19224 EVT VT;
19225 auto Alignment = Align(8);
19226 switch (Intrinsic) {
19227 case Intrinsic::ppc_stdcx:
19228 VT = MVT::i64;
19229 break;
19230 case Intrinsic::ppc_stwcx:
19231 VT = MVT::i32;
19232 Alignment = Align(4);
19233 break;
19234 case Intrinsic::ppc_sthcx:
19235 VT = MVT::i16;
19236 Alignment = Align(2);
19237 break;
19238 case Intrinsic::ppc_stbcx:
19239 VT = MVT::i8;
19240 Alignment = Align(1);
19241 break;
19242 }
19243 Info.opc = ISD::INTRINSIC_W_CHAIN;
19244 Info.memVT = VT;
19245 Info.ptrVal = I.getArgOperand(0);
19246 Info.offset = 0;
19247 Info.align = Alignment;
19249 Infos.push_back(Info);
19250 return;
19251 }
19252 default:
19253 break;
19254 }
19255}
19256
19257/// It returns EVT::Other if the type should be determined using generic
19258/// target-independent logic.
19260 LLVMContext &Context, const MemOp &Op,
19261 const AttributeList &FuncAttributes) const {
19262 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
19263 // We should use Altivec/VSX loads and stores when available. For unaligned
19264 // addresses, unaligned VSX loads are only fast starting with the P8.
19265 if (Subtarget.hasAltivec() && Op.size() >= 16) {
19266 if (Op.isMemset() && Subtarget.hasVSX()) {
19267 uint64_t TailSize = Op.size() % 16;
19268 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
19269 // element if vector element type matches tail store. For tail size
19270 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
19271 if (TailSize > 2 && TailSize <= 4) {
19272 return MVT::v8i16;
19273 }
19274 return MVT::v4i32;
19275 }
19276 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
19277 return MVT::v4i32;
19278 }
19279 }
19280
19281 if (Subtarget.isPPC64()) {
19282 return MVT::i64;
19283 }
19284
19285 return MVT::i32;
19286}
19287
19288/// Returns true if it is beneficial to convert a load of a constant
19289/// to just the constant itself.
19291 Type *Ty) const {
19292 assert(Ty->isIntegerTy());
19293
19294 unsigned BitSize = Ty->getPrimitiveSizeInBits();
19295 return !(BitSize == 0 || BitSize > 64);
19296}
19297
19299 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19300 return false;
19301 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
19302 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
19303 return NumBits1 == 64 && NumBits2 == 32;
19304}
19305
19307 if (!VT1.isInteger() || !VT2.isInteger())
19308 return false;
19309 unsigned NumBits1 = VT1.getSizeInBits();
19310 unsigned NumBits2 = VT2.getSizeInBits();
19311 return NumBits1 == 64 && NumBits2 == 32;
19312}
19313
19315 // Generally speaking, zexts are not free, but they are free when they can be
19316 // folded with other operations.
19317 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
19318 EVT MemVT = LD->getMemoryVT();
19319 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
19320 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
19321 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
19322 LD->getExtensionType() == ISD::ZEXTLOAD))
19323 return true;
19324 }
19325
19326 // FIXME: Add other cases...
19327 // - 32-bit shifts with a zext to i64
19328 // - zext after ctlz, bswap, etc.
19329 // - zext after and by a constant mask
19330
19331 return TargetLowering::isZExtFree(Val, VT2);
19332}
19333
19334bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
19335 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
19336 "invalid fpext types");
19337 // Extending to float128 is not free.
19338 if (DestVT == MVT::f128)
19339 return false;
19340 return true;
19341}
19342
19344 return isInt<16>(Imm) || isUInt<16>(Imm);
19345}
19346
19348 return isInt<16>(Imm) || isUInt<16>(Imm);
19349}
19350
19353 unsigned *Fast) const {
19355 return false;
19356
19357 // PowerPC supports unaligned memory access for simple non-vector types.
19358 // Although accessing unaligned addresses is not as efficient as accessing
19359 // aligned addresses, it is generally more efficient than manual expansion,
19360 // and generally only traps for software emulation when crossing page
19361 // boundaries.
19362
19363 if (!VT.isSimple())
19364 return false;
19365
19366 if (VT.isFloatingPoint() && !VT.isVector() &&
19367 !Subtarget.allowsUnalignedFPAccess())
19368 return false;
19369
19370 if (VT.getSimpleVT().isVector()) {
19371 if (Subtarget.hasVSX()) {
19372 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
19373 VT != MVT::v4f32 && VT != MVT::v4i32)
19374 return false;
19375 } else {
19376 return false;
19377 }
19378 }
19379
19380 if (VT == MVT::ppcf128)
19381 return false;
19382
19383 if (Fast)
19384 *Fast = 1;
19385
19386 return true;
19387}
19388
19390 SDValue C) const {
19391 // Check integral scalar types.
19392 if (!VT.isScalarInteger())
19393 return false;
19394 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
19395 if (!ConstNode->getAPIntValue().isSignedIntN(64))
19396 return false;
19397 // This transformation will generate >= 2 operations. But the following
19398 // cases will generate <= 2 instructions during ISEL. So exclude them.
19399 // 1. If the constant multiplier fits 16 bits, it can be handled by one
19400 // HW instruction, ie. MULLI
19401 // 2. If the multiplier after shifted fits 16 bits, an extra shift
19402 // instruction is needed than case 1, ie. MULLI and RLDICR
19403 int64_t Imm = ConstNode->getSExtValue();
19404 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
19405 Imm >>= Shift;
19406 if (isInt<16>(Imm))
19407 return false;
19408 uint64_t UImm = static_cast<uint64_t>(Imm);
19409 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
19410 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
19411 return true;
19412 }
19413 return false;
19414}
19415
19421
19423 Type *Ty) const {
19424 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
19425 return false;
19426 switch (Ty->getScalarType()->getTypeID()) {
19427 case Type::FloatTyID:
19428 case Type::DoubleTyID:
19429 return true;
19430 case Type::FP128TyID:
19431 return Subtarget.hasP9Vector();
19432 default:
19433 return false;
19434 }
19435}
19436
19437// FIXME: add more patterns which are not profitable to hoist.
19439 if (!I->hasOneUse())
19440 return true;
19441
19442 Instruction *User = I->user_back();
19443 assert(User && "A single use instruction with no uses.");
19444
19445 switch (I->getOpcode()) {
19446 case Instruction::FMul: {
19447 // Don't break FMA, PowerPC prefers FMA.
19448 if (User->getOpcode() != Instruction::FSub &&
19449 User->getOpcode() != Instruction::FAdd)
19450 return true;
19451
19453 const Function *F = I->getFunction();
19454 const DataLayout &DL = F->getDataLayout();
19455 Type *Ty = User->getOperand(0)->getType();
19456 bool AllowContract = I->getFastMathFlags().allowContract() &&
19457 User->getFastMathFlags().allowContract();
19458
19459 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
19461 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
19462 }
19463 case Instruction::Load: {
19464 // Don't break "store (load float*)" pattern, this pattern will be combined
19465 // to "store (load int32)" in later InstCombine pass. See function
19466 // combineLoadToOperationType. On PowerPC, loading a float point takes more
19467 // cycles than loading a 32 bit integer.
19468 LoadInst *LI = cast<LoadInst>(I);
19469 // For the loads that combineLoadToOperationType does nothing, like
19470 // ordered load, it should be profitable to hoist them.
19471 // For swifterror load, it can only be used for pointer to pointer type, so
19472 // later type check should get rid of this case.
19473 if (!LI->isUnordered())
19474 return true;
19475
19476 if (User->getOpcode() != Instruction::Store)
19477 return true;
19478
19479 if (I->getType()->getTypeID() != Type::FloatTyID)
19480 return true;
19481
19482 return false;
19483 }
19484 default:
19485 return true;
19486 }
19487 return true;
19488}
19489
19490const MCPhysReg *
19492 // LR is a callee-save register, but we must treat it as clobbered by any call
19493 // site. Hence we include LR in the scratch registers, which are in turn added
19494 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
19495 // to CTR, which is used by any indirect call.
19496 static const MCPhysReg ScratchRegs[] = {
19497 PPC::X12, PPC::LR8, PPC::CTR8, 0
19498 };
19499
19500 return ScratchRegs;
19501}
19502
19504 const Constant *PersonalityFn) const {
19505 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
19506}
19507
19509 const Constant *PersonalityFn) const {
19510 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
19511}
19512
19513bool
19515 EVT VT , unsigned DefinedValues) const {
19516 if (VT == MVT::v2i64)
19517 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
19518
19519 if (Subtarget.hasVSX())
19520 return true;
19521
19523}
19524
19526 if (DisableILPPref || Subtarget.enableMachineScheduler())
19528
19529 return Sched::ILP;
19530}
19531
19532// Create a fast isel object.
19534 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
19535 const LibcallLoweringInfo *LibcallLowering) const {
19536 return PPC::createFastISel(FuncInfo, LibInfo, LibcallLowering);
19537}
19538
19539// 'Inverted' means the FMA opcode after negating one multiplicand.
19540// For example, (fma -a b c) = (fnmsub a b c)
19541static unsigned invertFMAOpcode(unsigned Opc) {
19542 switch (Opc) {
19543 default:
19544 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19545 case ISD::FMA:
19546 return PPCISD::FNMSUB;
19547 case PPCISD::FNMSUB:
19548 return ISD::FMA;
19549 }
19550}
19551
19553 bool LegalOps, bool OptForSize,
19555 unsigned Depth) const {
19557 return SDValue();
19558
19559 unsigned Opc = Op.getOpcode();
19560 EVT VT = Op.getValueType();
19561 SDNodeFlags Flags = Op.getNode()->getFlags();
19562
19563 switch (Opc) {
19564 case PPCISD::FNMSUB:
19565 if (!Op.hasOneUse() || !isTypeLegal(VT))
19566 break;
19567
19568 SDValue N0 = Op.getOperand(0);
19569 SDValue N1 = Op.getOperand(1);
19570 SDValue N2 = Op.getOperand(2);
19571 SDLoc Loc(Op);
19572
19574 SDValue NegN2 =
19575 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19576
19577 if (!NegN2)
19578 return SDValue();
19579
19580 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19581 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19582 // These transformations may change sign of zeroes. For example,
19583 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19584 if (Flags.hasNoSignedZeros()) {
19585 // Try and choose the cheaper one to negate.
19587 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19588 N0Cost, Depth + 1);
19589
19591 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19592 N1Cost, Depth + 1);
19593
19594 if (NegN0 && N0Cost <= N1Cost) {
19595 Cost = std::min(N0Cost, N2Cost);
19596 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19597 } else if (NegN1) {
19598 Cost = std::min(N1Cost, N2Cost);
19599 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19600 }
19601 }
19602
19603 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19604 if (isOperationLegal(ISD::FMA, VT)) {
19605 Cost = N2Cost;
19606 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19607 }
19608
19609 break;
19610 }
19611
19612 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19613 Cost, Depth);
19614}
19615
19616// Override to enable LOAD_STACK_GUARD lowering on Linux.
19618 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19619 return true;
19621}
19622
19624 bool ForCodeSize) const {
19625 if (!VT.isSimple() || !Subtarget.hasVSX())
19626 return false;
19627
19628 switch(VT.getSimpleVT().SimpleTy) {
19629 default:
19630 // For FP types that are currently not supported by PPC backend, return
19631 // false. Examples: f16, f80.
19632 return false;
19633 case MVT::f32:
19634 case MVT::f64: {
19635 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19636 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19637 return true;
19638 }
19639 bool IsExact;
19640 APSInt IntResult(16, false);
19641 // The rounding mode doesn't really matter because we only care about floats
19642 // that can be converted to integers exactly.
19643 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19644 // For exact values in the range [-16, 15] we can materialize the float.
19645 if (IsExact && IntResult <= 15 && IntResult >= -16)
19646 return true;
19647 return Imm.isZero();
19648 }
19649 case MVT::ppcf128:
19650 return Imm.isPosZero();
19651 }
19652}
19653
19654// For vector shift operation op, fold
19655// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19657 SelectionDAG &DAG) {
19658 SDValue N0 = N->getOperand(0);
19659 SDValue N1 = N->getOperand(1);
19660 EVT VT = N0.getValueType();
19661 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19662 unsigned Opcode = N->getOpcode();
19663 unsigned TargetOpcode;
19664
19665 switch (Opcode) {
19666 default:
19667 llvm_unreachable("Unexpected shift operation");
19668 case ISD::SHL:
19669 TargetOpcode = PPCISD::SHL;
19670 break;
19671 case ISD::SRL:
19672 TargetOpcode = PPCISD::SRL;
19673 break;
19674 case ISD::SRA:
19675 TargetOpcode = PPCISD::SRA;
19676 break;
19677 }
19678
19679 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19680 N1->getOpcode() == ISD::AND)
19681 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19682 if (Mask->getZExtValue() == OpSizeInBits - 1)
19683 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19684
19685 return SDValue();
19686}
19687
19688SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19689 DAGCombinerInfo &DCI) const {
19690 EVT VT = N->getValueType(0);
19691 assert(VT.isVector() && "Vector type expected.");
19692
19693 unsigned Opc = N->getOpcode();
19694 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19695 "Unexpected opcode.");
19696
19697 if (!isOperationLegal(Opc, VT))
19698 return SDValue();
19699
19700 EVT EltTy = VT.getScalarType();
19701 unsigned EltBits = EltTy.getSizeInBits();
19702 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19703 return SDValue();
19704
19705 SDValue N1 = N->getOperand(1);
19706 uint64_t SplatBits = 0;
19707 bool AddSplatCase = false;
19708 unsigned OpcN1 = N1.getOpcode();
19709 if (OpcN1 == PPCISD::VADD_SPLAT &&
19711 AddSplatCase = true;
19712 SplatBits = N1.getConstantOperandVal(0);
19713 }
19714
19715 if (!AddSplatCase) {
19716 if (OpcN1 != ISD::BUILD_VECTOR)
19717 return SDValue();
19718
19719 unsigned SplatBitSize;
19720 bool HasAnyUndefs;
19721 APInt APSplatBits, APSplatUndef;
19722 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19723 bool BVNIsConstantSplat =
19724 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19725 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19726 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19727 return SDValue();
19728 SplatBits = APSplatBits.getZExtValue();
19729 }
19730
19731 SDLoc DL(N);
19732 SDValue N0 = N->getOperand(0);
19733 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19734 // shift vector, which means the max value is 31/63. A shift vector of all
19735 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19736 // -16 to 15 range.
19737 if (SplatBits == (EltBits - 1)) {
19738 unsigned NewOpc;
19739 switch (Opc) {
19740 case ISD::SHL:
19741 NewOpc = PPCISD::SHL;
19742 break;
19743 case ISD::SRL:
19744 NewOpc = PPCISD::SRL;
19745 break;
19746 case ISD::SRA:
19747 NewOpc = PPCISD::SRA;
19748 break;
19749 }
19750 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19751 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19752 }
19753
19754 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19755 return SDValue();
19756
19757 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19758 // before the BUILD_VECTOR is replaced by a load.
19759 if (EltTy != MVT::i64 || SplatBits != 1)
19760 return SDValue();
19761
19762 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19763}
19764
19765SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19766 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19767 return Value;
19768
19769 if (N->getValueType(0).isVector())
19770 return combineVectorShift(N, DCI);
19771
19772 SDValue N0 = N->getOperand(0);
19773 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19774 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19775 N0.getOpcode() != ISD::SIGN_EXTEND ||
19776 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19777 N->getValueType(0) != MVT::i64)
19778 return SDValue();
19779
19780 // We can't save an operation here if the value is already extended, and
19781 // the existing shift is easier to combine.
19782 SDValue ExtsSrc = N0.getOperand(0);
19783 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19784 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19785 return SDValue();
19786
19787 SDLoc DL(N0);
19788 SDValue ShiftBy = SDValue(CN1, 0);
19789 // We want the shift amount to be i32 on the extswli, but the shift could
19790 // have an i64.
19791 if (ShiftBy.getValueType() == MVT::i64)
19792 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19793
19794 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19795 ShiftBy);
19796}
19797
19798SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19799 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19800 return Value;
19801
19802 if (N->getValueType(0).isVector())
19803 return combineVectorShift(N, DCI);
19804
19805 return SDValue();
19806}
19807
19808SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19809 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19810 return Value;
19811
19812 if (N->getValueType(0).isVector())
19813 return combineVectorShift(N, DCI);
19814
19815 return SDValue();
19816}
19817
19818// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19819// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19820// When C is zero, the equation (addi Z, -C) can be simplified to Z
19821// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19823 const PPCSubtarget &Subtarget) {
19824 if (!Subtarget.isPPC64())
19825 return SDValue();
19826
19827 SDValue LHS = N->getOperand(0);
19828 SDValue RHS = N->getOperand(1);
19829
19830 auto isZextOfCompareWithConstant = [](SDValue Op) {
19831 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19832 Op.getValueType() != MVT::i64)
19833 return false;
19834
19835 SDValue Cmp = Op.getOperand(0);
19836 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19837 Cmp.getOperand(0).getValueType() != MVT::i64)
19838 return false;
19839
19840 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19841 int64_t NegConstant = 0 - Constant->getSExtValue();
19842 // Due to the limitations of the addi instruction,
19843 // -C is required to be [-32768, 32767].
19844 return isInt<16>(NegConstant);
19845 }
19846
19847 return false;
19848 };
19849
19850 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19851 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19852
19853 // If there is a pattern, canonicalize a zext operand to the RHS.
19854 if (LHSHasPattern && !RHSHasPattern)
19855 std::swap(LHS, RHS);
19856 else if (!LHSHasPattern && !RHSHasPattern)
19857 return SDValue();
19858
19859 SDLoc DL(N);
19860 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19861 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19862 SDValue Cmp = RHS.getOperand(0);
19863 SDValue Z = Cmp.getOperand(0);
19864 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19865 int64_t NegConstant = 0 - Constant->getSExtValue();
19866
19867 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19868 default: break;
19869 case ISD::SETNE: {
19870 // when C == 0
19871 // --> addze X, (addic Z, -1).carry
19872 // /
19873 // add X, (zext(setne Z, C))--
19874 // \ when -32768 <= -C <= 32767 && C != 0
19875 // --> addze X, (addic (addi Z, -C), -1).carry
19876 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19877 DAG.getConstant(NegConstant, DL, MVT::i64));
19878 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19879 SDValue Addc =
19880 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19881 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19882 DAG.getConstant(0, DL, CarryType));
19883 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19884 DAG.getConstant(0, DL, MVT::i64),
19885 SDValue(Addc.getNode(), 1));
19886 }
19887 case ISD::SETEQ: {
19888 // when C == 0
19889 // --> addze X, (subfic Z, 0).carry
19890 // /
19891 // add X, (zext(sete Z, C))--
19892 // \ when -32768 <= -C <= 32767 && C != 0
19893 // --> addze X, (subfic (addi Z, -C), 0).carry
19894 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19895 DAG.getConstant(NegConstant, DL, MVT::i64));
19896 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19897 SDValue Subc =
19898 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19899 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19900 DAG.getConstant(0, DL, CarryType));
19901 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19902 DAG.getConstant(1UL, DL, CarryType));
19903 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19904 DAG.getConstant(0, DL, MVT::i64), Invert);
19905 }
19906 }
19907
19908 return SDValue();
19909}
19910
19911// Transform
19912// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19913// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19914// In this case both C1 and C2 must be known constants.
19915// C1+C2 must fit into a 34 bit signed integer.
19917 const PPCSubtarget &Subtarget) {
19918 if (!Subtarget.isUsingPCRelativeCalls())
19919 return SDValue();
19920
19921 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19922 // If we find that node try to cast the Global Address and the Constant.
19923 SDValue LHS = N->getOperand(0);
19924 SDValue RHS = N->getOperand(1);
19925
19926 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19927 std::swap(LHS, RHS);
19928
19929 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19930 return SDValue();
19931
19932 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19935
19936 // Check that both casts succeeded.
19937 if (!GSDN || !ConstNode)
19938 return SDValue();
19939
19940 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19941 SDLoc DL(GSDN);
19942
19943 // The signed int offset needs to fit in 34 bits.
19944 if (!isInt<34>(NewOffset))
19945 return SDValue();
19946
19947 // The new global address is a copy of the old global address except
19948 // that it has the updated Offset.
19949 SDValue GA =
19950 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19951 NewOffset, GSDN->getTargetFlags());
19952 SDValue MatPCRel =
19953 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19954 return MatPCRel;
19955}
19956
19957// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19958// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19959// Mathematical identity: X + 1 = X - (-1)
19960// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19961// Requirement: VSX feature for efficient xxleqv generation
19963 const PPCSubtarget &Subtarget) {
19964
19965 EVT VT = N->getValueType(0);
19966 if (!Subtarget.hasVSX())
19967 return SDValue();
19968
19969 // Handle v2i64, v4i32, v8i16 and v16i8 types
19970 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19971 VT == MVT::v2i64))
19972 return SDValue();
19973
19974 SDValue LHS = N->getOperand(0);
19975 SDValue RHS = N->getOperand(1);
19976
19977 // Check if RHS is BUILD_VECTOR
19978 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19979 return SDValue();
19980
19981 // Check if all the elements are 1
19982 unsigned NumOfEles = RHS.getNumOperands();
19983 for (unsigned i = 0; i < NumOfEles; ++i) {
19984 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19985 if (!CN || CN->getSExtValue() != 1)
19986 return SDValue();
19987 }
19988 SDLoc DL(N);
19989
19990 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19991 SmallVector<SDValue, 4> Ops(4, MinusOne);
19992 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19993
19994 // Bitcast to the target vector type
19995 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19996
19997 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19998}
19999
20000SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
20001 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
20002 return Value;
20003
20004 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
20005 return Value;
20006
20007 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
20008 return Value;
20009 return SDValue();
20010}
20011
20012// Detect TRUNCATE operations on bitcasts of float128 values.
20013// What we are looking for here is the situtation where we extract a subset
20014// of bits from a 128 bit float.
20015// This can be of two forms:
20016// 1) BITCAST of f128 feeding TRUNCATE
20017// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
20018// The reason this is required is because we do not have a legal i128 type
20019// and so we want to prevent having to store the f128 and then reload part
20020// of it.
20021SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
20022 DAGCombinerInfo &DCI) const {
20023 // If we are using CRBits then try that first.
20024 if (Subtarget.useCRBits()) {
20025 // Check if CRBits did anything and return that if it did.
20026 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
20027 return CRTruncValue;
20028 }
20029
20030 SDLoc dl(N);
20031 SDValue Op0 = N->getOperand(0);
20032
20033 // Looking for a truncate of i128 to i64.
20034 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
20035 return SDValue();
20036
20037 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
20038
20039 // SRL feeding TRUNCATE.
20040 if (Op0.getOpcode() == ISD::SRL) {
20041 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
20042 // The right shift has to be by 64 bits.
20043 if (!ConstNode || ConstNode->getZExtValue() != 64)
20044 return SDValue();
20045
20046 // Switch the element number to extract.
20047 EltToExtract = EltToExtract ? 0 : 1;
20048 // Update Op0 past the SRL.
20049 Op0 = Op0.getOperand(0);
20050 }
20051
20052 // BITCAST feeding a TRUNCATE possibly via SRL.
20053 if (Op0.getOpcode() == ISD::BITCAST &&
20054 Op0.getValueType() == MVT::i128 &&
20055 Op0.getOperand(0).getValueType() == MVT::f128) {
20056 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
20057 return DCI.DAG.getNode(
20058 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
20059 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
20060 }
20061 return SDValue();
20062}
20063
20064SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
20065 SelectionDAG &DAG = DCI.DAG;
20066
20067 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
20068 if (!ConstOpOrElement)
20069 return SDValue();
20070
20071 // An imul is usually smaller than the alternative sequence for legal type.
20073 isOperationLegal(ISD::MUL, N->getValueType(0)))
20074 return SDValue();
20075
20076 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
20077 switch (this->Subtarget.getCPUDirective()) {
20078 default:
20079 // TODO: enhance the condition for subtarget before pwr8
20080 return false;
20081 case PPC::DIR_PWR8:
20082 // type mul add shl
20083 // scalar 4 1 1
20084 // vector 7 2 2
20085 return true;
20086 case PPC::DIR_PWR9:
20087 case PPC::DIR_PWR10:
20088 case PPC::DIR_PWR11:
20090 // type mul add shl
20091 // scalar 5 2 2
20092 // vector 7 2 2
20093
20094 // The cycle RATIO of related operations are showed as a table above.
20095 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
20096 // scalar and vector type. For 2 instrs patterns, add/sub + shl
20097 // are 4, it is always profitable; but for 3 instrs patterns
20098 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
20099 // So we should only do it for vector type.
20100 return IsAddOne && IsNeg ? VT.isVector() : true;
20101 }
20102 };
20103
20104 EVT VT = N->getValueType(0);
20105 SDLoc DL(N);
20106
20107 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
20108 bool IsNeg = MulAmt.isNegative();
20109 APInt MulAmtAbs = MulAmt.abs();
20110
20111 if ((MulAmtAbs - 1).isPowerOf2()) {
20112 // (mul x, 2^N + 1) => (add (shl x, N), x)
20113 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
20114
20115 if (!IsProfitable(IsNeg, true, VT))
20116 return SDValue();
20117
20118 SDValue Op0 = N->getOperand(0);
20119 SDValue Op1 =
20120 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20121 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
20122 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
20123
20124 if (!IsNeg)
20125 return Res;
20126
20127 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
20128 } else if ((MulAmtAbs + 1).isPowerOf2()) {
20129 // (mul x, 2^N - 1) => (sub (shl x, N), x)
20130 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
20131
20132 if (!IsProfitable(IsNeg, false, VT))
20133 return SDValue();
20134
20135 SDValue Op0 = N->getOperand(0);
20136 SDValue Op1 =
20137 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20138 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
20139
20140 if (!IsNeg)
20141 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
20142 else
20143 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
20144
20145 } else {
20146 return SDValue();
20147 }
20148}
20149
20150// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
20151// in combiner since we need to check SD flags and other subtarget features.
20152SDValue PPCTargetLowering::combineFMALike(SDNode *N,
20153 DAGCombinerInfo &DCI) const {
20154 SDValue N0 = N->getOperand(0);
20155 SDValue N1 = N->getOperand(1);
20156 SDValue N2 = N->getOperand(2);
20157 SDNodeFlags Flags = N->getFlags();
20158 EVT VT = N->getValueType(0);
20159 SelectionDAG &DAG = DCI.DAG;
20160 unsigned Opc = N->getOpcode();
20162 bool LegalOps = !DCI.isBeforeLegalizeOps();
20163 SDLoc Loc(N);
20164
20165 if (!isOperationLegal(ISD::FMA, VT))
20166 return SDValue();
20167
20168 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
20169 // since (fnmsub a b c)=-0 while c-ab=+0.
20170 if (!Flags.hasNoSignedZeros())
20171 return SDValue();
20172
20173 // (fma (fneg a) b c) => (fnmsub a b c)
20174 // (fnmsub (fneg a) b c) => (fma a b c)
20175 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
20176 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
20177
20178 // (fma a (fneg b) c) => (fnmsub a b c)
20179 // (fnmsub a (fneg b) c) => (fma a b c)
20180 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
20181 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
20182
20183 return SDValue();
20184}
20185
20186bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
20187 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
20188 if (!Subtarget.is64BitELFABI())
20189 return false;
20190
20191 // If not a tail call then no need to proceed.
20192 if (!CI->isTailCall())
20193 return false;
20194
20195 // If sibling calls have been disabled and tail-calls aren't guaranteed
20196 // there is no reason to duplicate.
20197 auto &TM = getTargetMachine();
20198 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
20199 return false;
20200
20201 // Can't tail call a function called indirectly, or if it has variadic args.
20202 const Function *Callee = CI->getCalledFunction();
20203 if (!Callee || Callee->isVarArg())
20204 return false;
20205
20206 // Make sure the callee and caller calling conventions are eligible for tco.
20207 const Function *Caller = CI->getParent()->getParent();
20208 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
20209 CI->getCallingConv()))
20210 return false;
20211
20212 // If the function is local then we have a good chance at tail-calling it
20213 return getTargetMachine().shouldAssumeDSOLocal(Callee);
20214}
20215
20216bool PPCTargetLowering::
20217isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
20218 const Value *Mask = AndI.getOperand(1);
20219 // If the mask is suitable for andi. or andis. we should sink the and.
20220 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
20221 // Can't handle constants wider than 64-bits.
20222 if (CI->getBitWidth() > 64)
20223 return false;
20224 int64_t ConstVal = CI->getZExtValue();
20225 return isUInt<16>(ConstVal) ||
20226 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
20227 }
20228
20229 // For non-constant masks, we can always use the record-form and.
20230 return true;
20231}
20232
20233/// getAddrModeForFlags - Based on the set of address flags, select the most
20234/// optimal instruction format to match by.
20235PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
20236 // This is not a node we should be handling here.
20237 if (Flags == PPC::MOF_None)
20238 return PPC::AM_None;
20239 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
20240 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
20241 if ((Flags & FlagSet) == FlagSet)
20242 return PPC::AM_DForm;
20243 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
20244 if ((Flags & FlagSet) == FlagSet)
20245 return PPC::AM_DSForm;
20246 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
20247 if ((Flags & FlagSet) == FlagSet)
20248 return PPC::AM_DQForm;
20249 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
20250 if ((Flags & FlagSet) == FlagSet)
20251 return PPC::AM_PrefixDForm;
20252 // If no other forms are selected, return an X-Form as it is the most
20253 // general addressing mode.
20254 return PPC::AM_XForm;
20255}
20256
20257/// Set alignment flags based on whether or not the Frame Index is aligned.
20258/// Utilized when computing flags for address computation when selecting
20259/// load and store instructions.
20260static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
20261 SelectionDAG &DAG) {
20262 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
20263 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
20264 if (!FI)
20265 return;
20267 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
20268 // If this is (add $FI, $S16Imm), the alignment flags are already set
20269 // based on the immediate. We just need to clear the alignment flags
20270 // if the FI alignment is weaker.
20271 if ((FrameIndexAlign % 4) != 0)
20272 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
20273 if ((FrameIndexAlign % 16) != 0)
20274 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
20275 // If the address is a plain FrameIndex, set alignment flags based on
20276 // FI alignment.
20277 if (!IsAdd) {
20278 if ((FrameIndexAlign % 4) == 0)
20279 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20280 if ((FrameIndexAlign % 16) == 0)
20281 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20282 }
20283}
20284
20285/// Given a node, compute flags that are used for address computation when
20286/// selecting load and store instructions. The flags computed are stored in
20287/// FlagSet. This function takes into account whether the node is a constant,
20288/// an ADD, OR, or a constant, and computes the address flags accordingly.
20289static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
20290 SelectionDAG &DAG) {
20291 // Set the alignment flags for the node depending on if the node is
20292 // 4-byte or 16-byte aligned.
20293 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
20294 if ((Imm & 0x3) == 0)
20295 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
20296 if ((Imm & 0xf) == 0)
20297 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
20298 };
20299
20301 // All 32-bit constants can be computed as LIS + Disp.
20302 const APInt &ConstImm = CN->getAPIntValue();
20303 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
20304 FlagSet |= PPC::MOF_AddrIsSImm32;
20305 SetAlignFlagsForImm(ConstImm.getZExtValue());
20306 setAlignFlagsForFI(N, FlagSet, DAG);
20307 }
20308 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
20309 FlagSet |= PPC::MOF_RPlusSImm34;
20310 else // Let constant materialization handle large constants.
20311 FlagSet |= PPC::MOF_NotAddNorCst;
20312 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
20313 // This address can be represented as an addition of:
20314 // - Register + Imm16 (possibly a multiple of 4/16)
20315 // - Register + Imm34
20316 // - Register + PPCISD::Lo
20317 // - Register + Register
20318 // In any case, we won't have to match this as Base + Zero.
20319 SDValue RHS = N.getOperand(1);
20321 const APInt &ConstImm = CN->getAPIntValue();
20322 if (ConstImm.isSignedIntN(16)) {
20323 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
20324 SetAlignFlagsForImm(ConstImm.getZExtValue());
20325 setAlignFlagsForFI(N, FlagSet, DAG);
20326 }
20327 if (ConstImm.isSignedIntN(34))
20328 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
20329 else
20330 FlagSet |= PPC::MOF_RPlusR; // Register.
20331 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
20332 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
20333 else
20334 FlagSet |= PPC::MOF_RPlusR;
20335 } else { // The address computation is not a constant or an addition.
20336 setAlignFlagsForFI(N, FlagSet, DAG);
20337 FlagSet |= PPC::MOF_NotAddNorCst;
20338 }
20339}
20340
20341static bool isPCRelNode(SDValue N) {
20342 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
20347}
20348
20349/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
20350/// the address flags of the load/store instruction that is to be matched.
20351unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
20352 SelectionDAG &DAG) const {
20353 unsigned FlagSet = PPC::MOF_None;
20354
20355 // Compute subtarget flags.
20356 if (!Subtarget.hasP9Vector())
20357 FlagSet |= PPC::MOF_SubtargetBeforeP9;
20358 else
20359 FlagSet |= PPC::MOF_SubtargetP9;
20360
20361 if (Subtarget.hasPrefixInstrs())
20362 FlagSet |= PPC::MOF_SubtargetP10;
20363
20364 if (Subtarget.hasSPE())
20365 FlagSet |= PPC::MOF_SubtargetSPE;
20366
20367 // Check if we have a PCRel node and return early.
20368 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
20369 return FlagSet;
20370
20371 // If the node is the paired load/store intrinsics, compute flags for
20372 // address computation and return early.
20373 unsigned ParentOp = Parent->getOpcode();
20374 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
20375 (ParentOp == ISD::INTRINSIC_VOID))) {
20376 unsigned ID = Parent->getConstantOperandVal(1);
20377 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
20378 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
20379 ? Parent->getOperand(2)
20380 : Parent->getOperand(3);
20381 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
20382 FlagSet |= PPC::MOF_Vector;
20383 return FlagSet;
20384 }
20385 }
20386
20387 // Mark this as something we don't want to handle here if it is atomic
20388 // or pre-increment instruction.
20389 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
20390 if (LSB->isIndexed())
20391 return PPC::MOF_None;
20392
20393 // Compute in-memory type flags. This is based on if there are scalars,
20394 // floats or vectors.
20395 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
20396 assert(MN && "Parent should be a MemSDNode!");
20397 EVT MemVT = MN->getMemoryVT();
20398 unsigned Size = MemVT.getSizeInBits();
20399 if (MemVT.isScalarInteger()) {
20400 assert(Size <= 128 &&
20401 "Not expecting scalar integers larger than 16 bytes!");
20402 if (Size < 32)
20403 FlagSet |= PPC::MOF_SubWordInt;
20404 else if (Size == 32)
20405 FlagSet |= PPC::MOF_WordInt;
20406 else
20407 FlagSet |= PPC::MOF_DoubleWordInt;
20408 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
20409 if (Size == 128)
20410 FlagSet |= PPC::MOF_Vector;
20411 else if (Size == 256) {
20412 assert(Subtarget.pairedVectorMemops() &&
20413 "256-bit vectors are only available when paired vector memops is "
20414 "enabled!");
20415 FlagSet |= PPC::MOF_Vector;
20416 } else
20417 llvm_unreachable("Not expecting illegal vectors!");
20418 } else { // Floating point type: can be scalar, f128 or vector types.
20419 if (Size == 32 || Size == 64)
20420 FlagSet |= PPC::MOF_ScalarFloat;
20421 else if (MemVT == MVT::f128 || MemVT.isVector())
20422 FlagSet |= PPC::MOF_Vector;
20423 else
20424 llvm_unreachable("Not expecting illegal scalar floats!");
20425 }
20426
20427 // Compute flags for address computation.
20428 computeFlagsForAddressComputation(N, FlagSet, DAG);
20429
20430 // Compute type extension flags.
20431 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
20432 switch (LN->getExtensionType()) {
20433 case ISD::SEXTLOAD:
20434 FlagSet |= PPC::MOF_SExt;
20435 break;
20436 case ISD::EXTLOAD:
20437 case ISD::ZEXTLOAD:
20438 FlagSet |= PPC::MOF_ZExt;
20439 break;
20440 case ISD::NON_EXTLOAD:
20441 FlagSet |= PPC::MOF_NoExt;
20442 break;
20443 }
20444 } else
20445 FlagSet |= PPC::MOF_NoExt;
20446
20447 // For integers, no extension is the same as zero extension.
20448 // We set the extension mode to zero extension so we don't have
20449 // to add separate entries in AddrModesMap for loads and stores.
20450 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
20451 FlagSet |= PPC::MOF_ZExt;
20452 FlagSet &= ~PPC::MOF_NoExt;
20453 }
20454
20455 // If we don't have prefixed instructions, 34-bit constants should be
20456 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
20457 bool IsNonP1034BitConst =
20459 FlagSet) == PPC::MOF_RPlusSImm34;
20460 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
20461 IsNonP1034BitConst)
20462 FlagSet |= PPC::MOF_NotAddNorCst;
20463
20464 return FlagSet;
20465}
20466
20467/// SelectForceXFormMode - Given the specified address, force it to be
20468/// represented as an indexed [r+r] operation (an XForm instruction).
20470 SDValue &Base,
20471 SelectionDAG &DAG) const {
20472
20474 int16_t ForceXFormImm = 0;
20475 if (provablyDisjointOr(DAG, N) &&
20476 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
20477 Disp = N.getOperand(0);
20478 Base = N.getOperand(1);
20479 return Mode;
20480 }
20481
20482 // If the address is the result of an add, we will utilize the fact that the
20483 // address calculation includes an implicit add. However, we can reduce
20484 // register pressure if we do not materialize a constant just for use as the
20485 // index register. We only get rid of the add if it is not an add of a
20486 // value and a 16-bit signed constant and both have a single use.
20487 if (N.getOpcode() == ISD::ADD &&
20488 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
20489 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
20490 Disp = N.getOperand(0);
20491 Base = N.getOperand(1);
20492 return Mode;
20493 }
20494
20495 // Otherwise, use R0 as the base register.
20496 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20497 N.getValueType());
20498 Base = N;
20499
20500 return Mode;
20501}
20502
20504 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20505 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20506 EVT ValVT = Val.getValueType();
20507 // If we are splitting a scalar integer into f64 parts (i.e. so they
20508 // can be placed into VFRC registers), we need to zero extend and
20509 // bitcast the values. This will ensure the value is placed into a
20510 // VSR using direct moves or stack operations as needed.
20511 if (PartVT == MVT::f64 &&
20512 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
20513 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
20514 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
20515 Parts[0] = Val;
20516 return true;
20517 }
20518 return false;
20519}
20520
20521SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
20522 SelectionDAG &DAG) const {
20523 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20525 EVT RetVT = Op.getValueType();
20526 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
20527 SDValue Callee =
20528 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
20529 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
20531 for (const SDValue &N : Op->op_values()) {
20532 EVT ArgVT = N.getValueType();
20533 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20534 TargetLowering::ArgListEntry Entry(N, ArgTy);
20535 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20536 Entry.IsZExt = !Entry.IsSExt;
20537 Args.push_back(Entry);
20538 }
20539
20540 SDValue InChain = DAG.getEntryNode();
20541 SDValue TCChain = InChain;
20542 const Function &F = DAG.getMachineFunction().getFunction();
20543 bool isTailCall =
20544 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20545 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20546 if (isTailCall)
20547 InChain = TCChain;
20548 CLI.setDebugLoc(SDLoc(Op))
20549 .setChain(InChain)
20550 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20551 .setTailCall(isTailCall)
20552 .setSExtResult(SignExtend)
20553 .setZExtResult(!SignExtend)
20555 return TLI.LowerCallTo(CLI).first;
20556}
20557
20558SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20559 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20560 SelectionDAG &DAG) const {
20561 if (Op.getValueType() == MVT::f32)
20562 return lowerToLibCall(LibCallFloatName, Op, DAG);
20563
20564 if (Op.getValueType() == MVT::f64)
20565 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20566
20567 return SDValue();
20568}
20569
20570bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20571 SDNodeFlags Flags = Op.getNode()->getFlags();
20572 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20573 Flags.hasNoNaNs() && Flags.hasNoInfs();
20574}
20575
20576bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20577 return Op.getNode()->getFlags().hasApproximateFuncs();
20578}
20579
20580bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20582}
20583
20584SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20585 const char *LibCallFloatName,
20586 const char *LibCallDoubleNameFinite,
20587 const char *LibCallFloatNameFinite,
20588 SDValue Op,
20589 SelectionDAG &DAG) const {
20590 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20591 return SDValue();
20592
20593 if (!isLowringToMASSFiniteSafe(Op))
20594 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20595 DAG);
20596
20597 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20598 LibCallDoubleNameFinite, Op, DAG);
20599}
20600
20601SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20602 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20603 "__xl_powf_finite", Op, DAG);
20604}
20605
20606SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20607 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20608 "__xl_sinf_finite", Op, DAG);
20609}
20610
20611SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20612 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20613 "__xl_cosf_finite", Op, DAG);
20614}
20615
20616SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20617 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20618 "__xl_logf_finite", Op, DAG);
20619}
20620
20621SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20622 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20623 "__xl_log10f_finite", Op, DAG);
20624}
20625
20626SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20627 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20628 "__xl_expf_finite", Op, DAG);
20629}
20630
20631// If we happen to match to an aligned D-Form, check if the Frame Index is
20632// adequately aligned. If it is not, reset the mode to match to X-Form.
20633static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20636 return;
20637 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20640}
20641
20642/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20643/// compute the address flags of the node, get the optimal address mode based
20644/// on the flags, and set the Base and Disp based on the address mode.
20646 SDValue N, SDValue &Disp,
20647 SDValue &Base,
20648 SelectionDAG &DAG,
20649 MaybeAlign Align) const {
20650 SDLoc DL(Parent);
20651
20652 // Compute the address flags.
20653 unsigned Flags = computeMOFlags(Parent, N, DAG);
20654
20655 // Get the optimal address mode based on the Flags.
20656 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20657
20658 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20659 // Select an X-Form load if it is not.
20660 setXFormForUnalignedFI(N, Flags, Mode);
20661
20662 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20663 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20664 assert(Subtarget.isUsingPCRelativeCalls() &&
20665 "Must be using PC-Relative calls when a valid PC-Relative node is "
20666 "present!");
20667 Mode = PPC::AM_PCRel;
20668 }
20669
20670 // Set Base and Disp accordingly depending on the address mode.
20671 switch (Mode) {
20672 case PPC::AM_DForm:
20673 case PPC::AM_DSForm:
20674 case PPC::AM_DQForm: {
20675 // This is a register plus a 16-bit immediate. The base will be the
20676 // register and the displacement will be the immediate unless it
20677 // isn't sufficiently aligned.
20678 if (Flags & PPC::MOF_RPlusSImm16) {
20679 SDValue Op0 = N.getOperand(0);
20680 SDValue Op1 = N.getOperand(1);
20681 int16_t Imm = Op1->getAsZExtVal();
20682 if (!Align || isAligned(*Align, Imm)) {
20683 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20684 Base = Op0;
20686 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20687 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20688 }
20689 break;
20690 }
20691 }
20692 // This is a register plus the @lo relocation. The base is the register
20693 // and the displacement is the global address.
20694 else if (Flags & PPC::MOF_RPlusLo) {
20695 Disp = N.getOperand(1).getOperand(0); // The global address.
20700 Base = N.getOperand(0);
20701 break;
20702 }
20703 // This is a constant address at most 32 bits. The base will be
20704 // zero or load-immediate-shifted and the displacement will be
20705 // the low 16 bits of the address.
20706 else if (Flags & PPC::MOF_AddrIsSImm32) {
20707 auto *CN = cast<ConstantSDNode>(N);
20708 EVT CNType = CN->getValueType(0);
20709 uint64_t CNImm = CN->getZExtValue();
20710 // If this address fits entirely in a 16-bit sext immediate field, codegen
20711 // this as "d, 0".
20712 int16_t Imm;
20713 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20714 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20715 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20716 CNType);
20717 break;
20718 }
20719 // Handle 32-bit sext immediate with LIS + Addr mode.
20720 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20721 (!Align || isAligned(*Align, CNImm))) {
20722 int32_t Addr = (int32_t)CNImm;
20723 // Otherwise, break this down into LIS + Disp.
20724 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20725 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20726 MVT::i32);
20727 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20728 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20729 break;
20730 }
20731 }
20732 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20733 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20735 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20736 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20737 } else
20738 Base = N;
20739 break;
20740 }
20741 case PPC::AM_PrefixDForm: {
20742 int64_t Imm34 = 0;
20743 unsigned Opcode = N.getOpcode();
20744 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20745 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20746 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20747 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20748 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20749 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20750 else
20751 Base = N.getOperand(0);
20752 } else if (isIntS34Immediate(N, Imm34)) {
20753 // The address is a 34-bit signed immediate.
20754 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20755 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20756 }
20757 break;
20758 }
20759 case PPC::AM_PCRel: {
20760 // When selecting PC-Relative instructions, "Base" is not utilized as
20761 // we select the address as [PC+imm].
20762 Disp = N;
20763 break;
20764 }
20765 case PPC::AM_None:
20766 break;
20767 default: { // By default, X-Form is always available to be selected.
20768 // When a frame index is not aligned, we also match by XForm.
20770 Base = FI ? N : N.getOperand(1);
20771 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20772 N.getValueType())
20773 : N.getOperand(0);
20774 break;
20775 }
20776 }
20777 return Mode;
20778}
20779
20781 bool Return,
20782 bool IsVarArg) const {
20783 switch (CC) {
20784 case CallingConv::Cold:
20785 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20786 default:
20787 return CC_PPC64_ELF;
20788 }
20789}
20790
20792 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20793}
20794
20797 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20798 if (shouldInlineQuadwordAtomics() && Size == 128)
20800
20801 switch (AI->getOperation()) {
20807 default:
20809 }
20810
20811 llvm_unreachable("unreachable atomicrmw operation");
20812}
20813
20822
20823static Intrinsic::ID
20825 switch (BinOp) {
20826 default:
20827 llvm_unreachable("Unexpected AtomicRMW BinOp");
20829 return Intrinsic::ppc_atomicrmw_xchg_i128;
20830 case AtomicRMWInst::Add:
20831 return Intrinsic::ppc_atomicrmw_add_i128;
20832 case AtomicRMWInst::Sub:
20833 return Intrinsic::ppc_atomicrmw_sub_i128;
20834 case AtomicRMWInst::And:
20835 return Intrinsic::ppc_atomicrmw_and_i128;
20836 case AtomicRMWInst::Or:
20837 return Intrinsic::ppc_atomicrmw_or_i128;
20838 case AtomicRMWInst::Xor:
20839 return Intrinsic::ppc_atomicrmw_xor_i128;
20841 return Intrinsic::ppc_atomicrmw_nand_i128;
20842 }
20843}
20844
20846 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20847 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20848 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20849 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20850 Type *ValTy = Incr->getType();
20851 assert(ValTy->getPrimitiveSizeInBits() == 128);
20852 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20853 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20854 Value *IncrHi =
20855 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20856 Value *LoHi = Builder.CreateIntrinsic(
20858 {AlignedAddr, IncrLo, IncrHi});
20859 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20860 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20861 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20862 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20863 return Builder.CreateOr(
20864 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20865}
20866
20868 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20869 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20870 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20871 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20872 Type *ValTy = CmpVal->getType();
20873 assert(ValTy->getPrimitiveSizeInBits() == 128);
20874 Function *IntCmpXchg =
20875 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20876 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20877 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20878 Value *CmpHi =
20879 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20880 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20881 Value *NewHi =
20882 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20883 emitLeadingFence(Builder, CI, Ord);
20884 Value *LoHi =
20885 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20886 emitTrailingFence(Builder, CI, Ord);
20887 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20888 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20889 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20890 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20891 return Builder.CreateOr(
20892 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20893}
20894
20896 return Subtarget.useCRBits();
20897}
20898
20899/// Shuffle masks for vectors of bits are not legal as such vectors are
20900/// reserved for MMA/DM.
20901bool PPCTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {
20902 if (VT.getScalarType() == MVT::i1)
20903 return false;
20904 return TargetLowering::isShuffleMaskLegal(Mask, VT);
20905}
20906
20907// Optimize the following patterns using vbpermq/vbpermd:
20908// i16 = bitcast(v16i1 truncate(v16i8))
20909// i8 = bitcast(v8i1 truncate(v8i16))
20910// i8 = bitcast(v8i1 truncate(v8i8))
20911SDValue PPCTargetLowering::DAGCombineBitcast(SDNode *N,
20912 DAGCombinerInfo &DCI) const {
20913 SDValue Op0 = N->getOperand(0);
20914 if (Op0.getOpcode() != ISD::TRUNCATE)
20915 return SDValue();
20916 SDValue Src = Op0.getOperand(0);
20917 EVT ResVT = N->getValueType(0);
20918 EVT TruncResVT = Op0.getValueType();
20919 EVT SrcVT = Src.getValueType();
20920 SDLoc dl(N);
20921 SelectionDAG &DAG = DCI.DAG;
20922 bool IsLittleEndian = Subtarget.isLittleEndian();
20923
20924 if (ResVT != MVT::i16 && ResVT != MVT::i8)
20925 return SDValue();
20926 SDValue VBPerm =
20927 GenerateVBPERM(DAG, dl, Src, SrcVT, TruncResVT, IsLittleEndian);
20928 if (!VBPerm)
20929 return SDValue();
20930 SDValue ForExtract = DAG.getBitcast(MVT::v4i32, VBPerm);
20931 SDValue Extracted =
20932 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, ForExtract,
20933 DAG.getIntPtrConstant(IsLittleEndian ? 2 : 1, dl));
20934 return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Extracted);
20935}
20936
20937SDValue PPCTargetLowering::GenerateVBPERM(SelectionDAG &DAG, SDLoc dl,
20938 SDValue Src, EVT SrcVT, EVT ResVT,
20939 bool IsLE) const {
20940 bool IsV16i8 = (ResVT == MVT::v16i1 && SrcVT == MVT::v16i8);
20941 bool IsV8i16 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i16);
20942 bool IsV8i8 = (ResVT == MVT::v8i1 && SrcVT == MVT::v8i8);
20943
20944 if (!IsV16i8 && !IsV8i16 && !IsV8i8)
20945 return SDValue();
20946
20947 if (IsV8i8) {
20948 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i8,
20949 DAG.getUNDEF(MVT::v16i8), Src,
20950 DAG.getIntPtrConstant(0, dl));
20951 }
20952 SmallVector<int, 16> BitIndices(16, 128);
20953 unsigned NumElts = SrcVT.getVectorNumElements();
20954 unsigned EltSize = SrcVT.getScalarType().getSizeInBits();
20955 for (int Idx = 0, End = SrcVT.getVectorNumElements(); Idx < End; Idx++) {
20956 BitIndices[Idx] = EltSize * (NumElts - Idx) - 1;
20957 if (IsV8i8 && IsLE)
20958 BitIndices[Idx] += 64;
20959 }
20960 if (!IsLE)
20961 std::reverse(BitIndices.begin(), BitIndices.end());
20963 for (auto Idx : BitIndices)
20964 BVOps.push_back(DAG.getConstant(Idx, dl, MVT::i8));
20965 SDValue VRB = DAG.getBuildVector(MVT::v16i8, dl, BVOps);
20966 return DAG.getNode(
20967 ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
20968 DAG.getConstant(Intrinsic::ppc_altivec_vbpermq, dl, MVT::i32),
20969 DAG.getBitcast(MVT::v16i8, Src), VRB);
20970}
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
MachineInstr unsigned OpIdx
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static MachineBasicBlock * emitAtomicCmpSwapSoftware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit software-emulated atomic compare-and-swap for I8/I16 without hardware partword atomic support.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static void prepareOutOfLineGlueCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool IsSelectCC(unsigned Opcode)
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG)
Optimize the bitfloor(X) pattern for PowerPC.
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static MachineBasicBlock * emitSelect(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit SELECT instruction, using ISEL if available, otherwise use branch-based control flow.
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static void createAtomicLoopBlocks(MachineFunction *F, MachineBasicBlock *BB, MachineBasicBlock *&loop1MBB, MachineBasicBlock *&loop2MBB, MachineBasicBlock *&exitMBB, MachineInstr &MI, MachineFunction::iterator It)
Helper function to create basic blocks for atomic compare-and-swap.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static MachineBasicBlock * emitAtomicCmpSwapHardware(MachineInstr &MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const PPCSubtarget &Subtarget)
Emit hardware-supported atomic compare-and-swap for I32/I64 and I8/I16 with partword atomic support.
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static void signExtendOperandIfUnknown(MachineInstr &MI, MachineBasicBlock *BB, unsigned OpIdx, bool IsByte, const PPCInstrInfo *TII)
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5899
bool isDenormal() const
Definition APFloat.h:1539
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
APInt abs() const
Get the absolute value.
Definition APInt.h:1818
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:714
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
arg_iterator arg_begin()
Definition Function.h:868
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:659
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:200
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1080
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
MCRegister getGlueCodeDescriptorRegister() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:184
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
@ FloatTyID
32-bit floating point type
Definition Type.h:59
@ DoubleTyID
64-bit floating point type
Definition Type.h:60
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:62
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ TargetConstantPool
Definition ISDOpcodes.h:189
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:979
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ TargetExternalSymbol
Definition ISDOpcodes.h:190
@ BR
Control flow instructions. These all have token chains.
@ TargetJumpTable
Definition ISDOpcodes.h:188
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:974
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:186
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:155
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:148
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:196
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:199
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:174
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:205
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:156
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:123
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:202
@ MO_TPREL_HA
Definition PPC.h:181
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:190
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TPREL_LO
Definition PPC.h:180
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:177
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:168
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:193
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:137
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:162
@ MO_HA
Definition PPC.h:178
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:119
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, const LibcallLoweringInfo *LibcallLowering)
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:32
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:150
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:58
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.