LLVM 22.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
128 }
129
130 if (Subtarget.hasVector())
131 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
132 }
133
134 // Compute derived properties from the register classes
135 computeRegisterProperties(Subtarget.getRegisterInfo());
136
137 // Set up special registers.
138 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
139
140 // TODO: It may be better to default to latency-oriented scheduling, however
141 // LLVM's current latency-oriented scheduler can't handle physreg definitions
142 // such as SystemZ has with CC, so set this to the register-pressure
143 // scheduler, because it can.
145
148
150
151 // Instructions are strings of 2-byte aligned 2-byte values.
153 // For performance reasons we prefer 16-byte alignment.
155
156 // Handle operations that are handled in a similar way for all types.
157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
158 I <= MVT::LAST_FP_VALUETYPE;
159 ++I) {
161 if (isTypeLegal(VT)) {
162 // Lower SET_CC into an IPM-based sequence.
166
167 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169
170 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
173 }
174 }
175
176 // Expand jump table branches as address arithmetic followed by an
177 // indirect jump.
179
180 // Expand BRCOND into a BR_CC (see above).
182
183 // Handle integer types except i128.
184 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
185 I <= MVT::LAST_INTEGER_VALUETYPE;
186 ++I) {
188 if (isTypeLegal(VT) && VT != MVT::i128) {
190
191 // Expand individual DIV and REMs into DIVREMs.
198
199 // Support addition/subtraction with overflow.
202
203 // Support addition/subtraction with carry.
206
207 // Support carry in as value rather than glue.
210
211 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
212 // available, or if the operand is constant.
214
215 // Use POPCNT on z196 and above.
216 if (Subtarget.hasPopulationCount())
218 else
220
221 // No special instructions for these.
224
225 // Use *MUL_LOHI where possible instead of MULH*.
230
231 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
232 // unsigned on z10 (only z196 and above have native support for
233 // unsigned conversions).
240 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
241 auto OpAction =
242 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
243 setOperationAction(Op, VT, OpAction);
244 }
245 }
246 }
247
248 // Handle i128 if legal.
249 if (isTypeLegal(MVT::i128)) {
250 // No special instructions for these.
257
258 // We may be able to use VSLDB/VSLD/VSRD for these.
261
262 // No special instructions for these before z17.
263 if (!Subtarget.hasVectorEnhancements3()) {
273 } else {
274 // Even if we do have a legal 128-bit multiply, we do not
275 // want 64-bit multiply-high operations to use it.
278 }
279
280 // Support addition/subtraction with carry.
285
286 // Use VPOPCT and add up partial results.
288
289 // Additional instructions available with z17.
290 if (Subtarget.hasVectorEnhancements3()) {
291 setOperationAction(ISD::ABS, MVT::i128, Legal);
292
294 MVT::i128, Legal);
295 }
296 }
297
298 // These need custom handling in order to handle the f16 conversions.
307
308 // Type legalization will convert 8- and 16-bit atomic operations into
309 // forms that operate on i32s (but still keeping the original memory VT).
310 // Lower them into full i32 operations.
322
323 // Whether or not i128 is not a legal type, we need to custom lower
324 // the atomic operations in order to exploit SystemZ instructions.
329
330 // Mark sign/zero extending atomic loads as legal, which will make
331 // DAGCombiner fold extensions into atomic loads if possible.
333 {MVT::i8, MVT::i16, MVT::i32}, Legal);
335 {MVT::i8, MVT::i16}, Legal);
337 MVT::i8, Legal);
338
339 // We can use the CC result of compare-and-swap to implement
340 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
344
346
347 // Traps are legal, as we will convert them to "j .+2".
348 setOperationAction(ISD::TRAP, MVT::Other, Legal);
349
350 // We have native support for a 64-bit CTLZ, via FLOGR.
354
355 // On z17 we have native support for a 64-bit CTTZ.
356 if (Subtarget.hasMiscellaneousExtensions4()) {
360 }
361
362 // On z15 we have native support for a 64-bit CTPOP.
363 if (Subtarget.hasMiscellaneousExtensions3()) {
366 }
367
368 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370
371 // Expand 128 bit shifts without using a libcall.
375
376 // Also expand 256 bit shifts if i128 is a legal type.
377 if (isTypeLegal(MVT::i128)) {
381 }
382
383 // Handle bitcast from fp128 to i128.
384 if (!isTypeLegal(MVT::i128))
386
387 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 for (MVT VT : MVT::integer_valuetypes()) {
393 }
394
395 // Handle the various types of symbolic address.
401
402 // We need to handle dynamic allocations specially because of the
403 // 160-byte area at the bottom of the stack.
406
409
410 // Handle prefetches with PFD or PFDRL.
412
413 // Handle readcyclecounter with STCKF.
415
417 // Assume by default that all vector operations need to be expanded.
418 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
419 if (getOperationAction(Opcode, VT) == Legal)
420 setOperationAction(Opcode, VT, Expand);
421
422 // Likewise all truncating stores and extending loads.
423 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
424 setTruncStoreAction(VT, InnerVT, Expand);
427 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
428 }
429
430 if (isTypeLegal(VT)) {
431 // These operations are legal for anything that can be stored in a
432 // vector register, even if there is no native support for the format
433 // as such. In particular, we can do these for v4f32 even though there
434 // are no specific instructions for that format.
440
441 // Likewise, except that we need to replace the nodes with something
442 // more specific.
445 }
446 }
447
448 // Handle integer vector types.
450 if (isTypeLegal(VT)) {
451 // These operations have direct equivalents.
456 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
460 }
461 if (Subtarget.hasVectorEnhancements3() &&
462 VT != MVT::v16i8 && VT != MVT::v8i16) {
467 }
472 if (Subtarget.hasVectorEnhancements1())
474 else
478
479 // Convert a GPR scalar to a vector by inserting it into element 0.
481
482 // Use a series of unpacks for extensions.
485
486 // Detect shifts/rotates by a scalar amount and convert them into
487 // V*_BY_SCALAR.
492
493 // Add ISD::VECREDUCE_ADD as custom in order to implement
494 // it with VZERO+VSUM
496
497 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
498 // and inverting the result as necessary.
500
502 Legal);
503 }
504 }
505
506 if (Subtarget.hasVector()) {
507 // There should be no need to check for float types other than v2f64
508 // since <2 x f32> isn't a legal type.
517
526 }
527
528 if (Subtarget.hasVectorEnhancements2()) {
537
546 }
547
548 // Handle floating-point types.
549 if (!useSoftFloat()) {
550 // Promote all f16 operations to float, with some exceptions below.
551 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
552 setOperationAction(Opc, MVT::f16, Promote);
554 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setTruncStoreAction(VT, MVT::f16, Expand);
557 }
559 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
564 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
565 setOperationAction(Op, MVT::f16, Legal);
566 }
567
568 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
569 I <= MVT::LAST_FP_VALUETYPE;
570 ++I) {
572 if (isTypeLegal(VT) && VT != MVT::f16) {
573 // We can use FI for FRINT.
575
576 // We can use the extended form of FI for other rounding operations.
577 if (Subtarget.hasFPExtension()) {
584 }
585
586 // No special instructions for these.
592
593 // Special treatment.
595
596 // Handle constrained floating-point operations.
605 if (Subtarget.hasFPExtension()) {
612 }
613
614 // Extension from f16 needs libcall.
617 }
618 }
619
620 // Handle floating-point vector types.
621 if (Subtarget.hasVector()) {
622 // Scalar-to-vector conversion is just a subreg.
625
626 // Some insertions and extractions can be done directly but others
627 // need to go via integers.
632
633 // These operations have direct equivalents.
634 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
635 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
636 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
637 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
638 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
639 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
640 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
641 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
642 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
645 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
649
650 // Handle constrained floating-point operations.
664
669 if (Subtarget.hasVectorEnhancements1()) {
672 }
673 }
674
675 // The vector enhancements facility 1 has instructions for these.
676 if (Subtarget.hasVectorEnhancements1()) {
677 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
678 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
679 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
680 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
681 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
682 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
683 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
684 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
685 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
688 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
692
697
702
707
712
717
718 // Handle constrained floating-point operations.
732 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
733 MVT::v4f32, MVT::v2f64 }) {
738 }
739 }
740
741 // We only have fused f128 multiply-addition on vector registers.
742 if (!Subtarget.hasVectorEnhancements1()) {
745 }
746
747 // We don't have a copysign instruction on vector registers.
748 if (Subtarget.hasVectorEnhancements1())
750
751 // Needed so that we don't try to implement f128 constant loads using
752 // a load-and-extend of a f80 constant (in cases where the constant
753 // would fit in an f80).
754 for (MVT VT : MVT::fp_valuetypes())
755 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
756
757 // We don't have extending load instruction on vector registers.
758 if (Subtarget.hasVectorEnhancements1()) {
759 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
760 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
761 }
762
763 // Floating-point truncation and stores need to be done separately.
764 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
765 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
766 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
767
768 // We have 64-bit FPR<->GPR moves, but need special handling for
769 // 32-bit forms.
770 if (!Subtarget.hasVector()) {
773 }
774
775 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
776 // structure, but VAEND is a no-op.
780
781 if (Subtarget.isTargetzOS()) {
782 // Handle address space casts between mixed sized pointers.
785 }
786
788
789 // Codes for which we want to perform some z-specific combinations.
793 ISD::LOAD,
806 ISD::SRL,
807 ISD::SRA,
808 ISD::MUL,
809 ISD::SDIV,
810 ISD::UDIV,
811 ISD::SREM,
812 ISD::UREM,
815
816 // Handle intrinsics.
819
820 // We're not using SJLJ for exception handling, but they're implemented
821 // solely to support use of __builtin_setjmp / __builtin_longjmp.
824
825 // We want to use MVC in preference to even a single load/store pair.
826 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
828
829 // The main memset sequence is a byte store followed by an MVC.
830 // Two STC or MV..I stores win over that, but the kind of fused stores
831 // generated by target-independent code don't when the byte value is
832 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
833 // than "STC;MVC". Handle the choice in target-specific code instead.
834 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
836
837 // Default to having -disable-strictnode-mutation on
838 IsStrictFPEnabled = true;
839}
840
842 return Subtarget.hasSoftFloat();
843}
844
846 LLVMContext &, EVT VT) const {
847 if (!VT.isVector())
848 return MVT::i32;
850}
851
853 const MachineFunction &MF, EVT VT) const {
854 if (useSoftFloat())
855 return false;
856
857 VT = VT.getScalarType();
858
859 if (!VT.isSimple())
860 return false;
861
862 switch (VT.getSimpleVT().SimpleTy) {
863 case MVT::f32:
864 case MVT::f64:
865 return true;
866 case MVT::f128:
867 return Subtarget.hasVectorEnhancements1();
868 default:
869 break;
870 }
871
872 return false;
873}
874
875// Return true if the constant can be generated with a vector instruction,
876// such as VGM, VGMB or VREPI.
878 const SystemZSubtarget &Subtarget) {
879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
880 if (!Subtarget.hasVector() ||
881 (isFP128 && !Subtarget.hasVectorEnhancements1()))
882 return false;
883
884 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
885 // preferred way of creating all-zero and all-one vectors so give it
886 // priority over other methods below.
887 unsigned Mask = 0;
888 unsigned I = 0;
889 for (; I < SystemZ::VectorBytes; ++I) {
890 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
891 if (Byte == 0xff)
892 Mask |= 1ULL << I;
893 else if (Byte != 0)
894 break;
895 }
896 if (I == SystemZ::VectorBytes) {
897 Opcode = SystemZISD::BYTE_MASK;
898 OpVals.push_back(Mask);
900 return true;
901 }
902
903 if (SplatBitSize > 64)
904 return false;
905
906 auto TryValue = [&](uint64_t Value) -> bool {
907 // Try VECTOR REPLICATE IMMEDIATE
908 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
909 if (isInt<16>(SignedValue)) {
910 OpVals.push_back(((unsigned) SignedValue));
911 Opcode = SystemZISD::REPLICATE;
913 SystemZ::VectorBits / SplatBitSize);
914 return true;
915 }
916 // Try VECTOR GENERATE MASK
917 unsigned Start, End;
918 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
919 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
920 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
921 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
922 OpVals.push_back(Start - (64 - SplatBitSize));
923 OpVals.push_back(End - (64 - SplatBitSize));
924 Opcode = SystemZISD::ROTATE_MASK;
926 SystemZ::VectorBits / SplatBitSize);
927 return true;
928 }
929 return false;
930 };
931
932 // First try assuming that any undefined bits above the highest set bit
933 // and below the lowest set bit are 1s. This increases the likelihood of
934 // being able to use a sign-extended element value in VECTOR REPLICATE
935 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
936 uint64_t SplatBitsZ = SplatBits.getZExtValue();
937 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
938 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
939 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
940 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
941 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
942 if (TryValue(SplatBitsZ | Upper | Lower))
943 return true;
944
945 // Now try assuming that any undefined bits between the first and
946 // last defined set bits are set. This increases the chances of
947 // using a non-wraparound mask.
948 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
949 return TryValue(SplatBitsZ | Middle);
950}
951
953 if (IntImm.isSingleWord()) {
954 IntBits = APInt(128, IntImm.getZExtValue());
955 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
956 } else
957 IntBits = IntImm;
958 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
959
960 // Find the smallest splat.
961 SplatBits = IntImm;
962 unsigned Width = SplatBits.getBitWidth();
963 while (Width > 8) {
964 unsigned HalfSize = Width / 2;
965 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
966 APInt LowValue = SplatBits.trunc(HalfSize);
967
968 // If the two halves do not match, stop here.
969 if (HighValue != LowValue || 8 > HalfSize)
970 break;
971
972 SplatBits = HighValue;
973 Width = HalfSize;
974 }
975 SplatUndef = 0;
976 SplatBitSize = Width;
977}
978
980 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
981 bool HasAnyUndefs;
982
983 // Get IntBits by finding the 128 bit splat.
984 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
985 true);
986
987 // Get SplatBits by finding the 8 bit or greater splat.
988 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
989 true);
990}
991
993 bool ForCodeSize) const {
994 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
995 if (Imm.isZero() || Imm.isNegZero())
996 return true;
997
999}
1000
1003 MachineBasicBlock *MBB) const {
1004 DebugLoc DL = MI.getDebugLoc();
1005 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1006 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1007
1008 MachineFunction *MF = MBB->getParent();
1010
1011 const BasicBlock *BB = MBB->getBasicBlock();
1012 MachineFunction::iterator I = ++MBB->getIterator();
1013
1014 Register DstReg = MI.getOperand(0).getReg();
1015 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1016 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1017 (void)TRI;
1018 Register MainDstReg = MRI.createVirtualRegister(RC);
1019 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1020
1021 MVT PVT = getPointerTy(MF->getDataLayout());
1022 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1023 // For v = setjmp(buf), we generate.
1024 // Algorithm:
1025 //
1026 // ---------
1027 // | thisMBB |
1028 // ---------
1029 // |
1030 // ------------------------
1031 // | |
1032 // ---------- ---------------
1033 // | mainMBB | | restoreMBB |
1034 // | v = 0 | | v = 1 |
1035 // ---------- ---------------
1036 // | |
1037 // -------------------------
1038 // |
1039 // -----------------------------
1040 // | sinkMBB |
1041 // | phi(v_mainMBB,v_restoreMBB) |
1042 // -----------------------------
1043 // thisMBB:
1044 // buf[FPOffset] = Frame Pointer if hasFP.
1045 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1046 // buf[BCOffset] = Backchain value if building with -mbackchain.
1047 // buf[SPOffset] = Stack Pointer.
1048 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1049 // SjLjSetup restoreMBB
1050 // mainMBB:
1051 // v_main = 0
1052 // sinkMBB:
1053 // v = phi(v_main, v_restore)
1054 // restoreMBB:
1055 // v_restore = 1
1056
1057 MachineBasicBlock *ThisMBB = MBB;
1058 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1059 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1060 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1061
1062 MF->insert(I, MainMBB);
1063 MF->insert(I, SinkMBB);
1064 MF->push_back(RestoreMBB);
1065 RestoreMBB->setMachineBlockAddressTaken();
1066
1068
1069 // Transfer the remainder of BB and its successor edges to sinkMBB.
1070 SinkMBB->splice(SinkMBB->begin(), MBB,
1071 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1073
1074 // thisMBB:
1075 const int64_t FPOffset = 0; // Slot 1.
1076 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1077 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1078 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1079
1080 // Buf address.
1081 Register BufReg = MI.getOperand(1).getReg();
1082
1083 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1084 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1085
1086 // Prepare IP for longjmp.
1087 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1088 .addMBB(RestoreMBB);
1089 // Store IP for return from jmp, slot 2, offset = 1.
1090 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1091 .addReg(LabelReg)
1092 .addReg(BufReg)
1093 .addImm(LabelOffset)
1094 .addReg(0);
1095
1096 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1097 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1098 if (HasFP) {
1099 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1100 .addReg(SpecialRegs->getFramePointerRegister())
1101 .addReg(BufReg)
1102 .addImm(FPOffset)
1103 .addReg(0);
1104 }
1105
1106 // Store SP.
1107 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1108 .addReg(SpecialRegs->getStackPointerRegister())
1109 .addReg(BufReg)
1110 .addImm(SPOffset)
1111 .addReg(0);
1112
1113 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1114 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1115 if (BackChain) {
1116 Register BCReg = MRI.createVirtualRegister(PtrRC);
1117 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1118 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1119 .addReg(SpecialRegs->getStackPointerRegister())
1120 .addImm(TFL->getBackchainOffset(*MF))
1121 .addReg(0);
1122
1123 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1124 .addReg(BCReg)
1125 .addReg(BufReg)
1126 .addImm(BCOffset)
1127 .addReg(0);
1128 }
1129
1130 // Setup.
1131 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1132 .addMBB(RestoreMBB);
1133
1134 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1135 MIB.addRegMask(RegInfo->getNoPreservedMask());
1136
1137 ThisMBB->addSuccessor(MainMBB);
1138 ThisMBB->addSuccessor(RestoreMBB);
1139
1140 // mainMBB:
1141 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1142 MainMBB->addSuccessor(SinkMBB);
1143
1144 // sinkMBB:
1145 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1146 .addReg(MainDstReg)
1147 .addMBB(MainMBB)
1148 .addReg(RestoreDstReg)
1149 .addMBB(RestoreMBB);
1150
1151 // restoreMBB.
1152 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1153 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1154 RestoreMBB->addSuccessor(SinkMBB);
1155
1156 MI.eraseFromParent();
1157
1158 return SinkMBB;
1159}
1160
1163 MachineBasicBlock *MBB) const {
1164
1165 DebugLoc DL = MI.getDebugLoc();
1166 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1167
1168 MachineFunction *MF = MBB->getParent();
1170
1171 MVT PVT = getPointerTy(MF->getDataLayout());
1172 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1173 Register BufReg = MI.getOperand(0).getReg();
1174 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1175 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1176
1177 Register Tmp = MRI.createVirtualRegister(RC);
1178 Register BCReg = MRI.createVirtualRegister(RC);
1179
1181
1182 const int64_t FPOffset = 0;
1183 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1184 const int64_t BCOffset = 2 * PVT.getStoreSize();
1185 const int64_t SPOffset = 3 * PVT.getStoreSize();
1186 const int64_t LPOffset = 4 * PVT.getStoreSize();
1187
1188 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1189 .addReg(BufReg)
1190 .addImm(LabelOffset)
1191 .addReg(0);
1192
1193 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1194 SpecialRegs->getFramePointerRegister())
1195 .addReg(BufReg)
1196 .addImm(FPOffset)
1197 .addReg(0);
1198
1199 // We are restoring R13 even though we never stored in setjmp from llvm,
1200 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1201 // gcc setjmp and llvm longjmp.
1202 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1203 .addReg(BufReg)
1204 .addImm(LPOffset)
1205 .addReg(0);
1206
1207 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1208 if (BackChain) {
1209 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1210 .addReg(BufReg)
1211 .addImm(BCOffset)
1212 .addReg(0);
1213 }
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1216 SpecialRegs->getStackPointerRegister())
1217 .addReg(BufReg)
1218 .addImm(SPOffset)
1219 .addReg(0);
1220
1221 if (BackChain) {
1222 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1223 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1224 .addReg(BCReg)
1225 .addReg(SpecialRegs->getStackPointerRegister())
1226 .addImm(TFL->getBackchainOffset(*MF))
1227 .addReg(0);
1228 }
1229
1230 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1231
1232 MI.eraseFromParent();
1233 return MBB;
1234}
1235
1236/// Returns true if stack probing through inline assembly is requested.
1238 // If the function specifically requests inline stack probes, emit them.
1239 if (MF.getFunction().hasFnAttribute("probe-stack"))
1240 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1241 "inline-asm";
1242 return false;
1243}
1244
1249
1254
1257 // Don't expand subword operations as they require special treatment.
1258 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1260
1261 // Don't expand if there is a target instruction available.
1262 if (Subtarget.hasInterlockedAccess1() &&
1263 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1270
1272}
1273
1275 // We can use CGFI or CLGFI.
1276 return isInt<32>(Imm) || isUInt<32>(Imm);
1277}
1278
1280 // We can use ALGFI or SLGFI.
1281 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1282}
1283
1285 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1286 // Unaligned accesses should never be slower than the expanded version.
1287 // We check specifically for aligned accesses in the few cases where
1288 // they are required.
1289 if (Fast)
1290 *Fast = 1;
1291 return true;
1292}
1293
1295 EVT VT = Y.getValueType();
1296
1297 // We can use NC(G)RK for types in GPRs ...
1298 if (VT == MVT::i32 || VT == MVT::i64)
1299 return Subtarget.hasMiscellaneousExtensions3();
1300
1301 // ... or VNC for types in VRs.
1302 if (VT.isVector() || VT == MVT::i128)
1303 return Subtarget.hasVector();
1304
1305 return false;
1306}
1307
1308// Information about the addressing mode for a memory access.
1310 // True if a long displacement is supported.
1312
1313 // True if use of index register is supported.
1315
1316 AddressingMode(bool LongDispl, bool IdxReg) :
1317 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1318};
1319
1320// Return the desired addressing mode for a Load which has only one use (in
1321// the same block) which is a Store.
1323 Type *Ty) {
1324 // With vector support a Load->Store combination may be combined to either
1325 // an MVC or vector operations and it seems to work best to allow the
1326 // vector addressing mode.
1327 if (HasVector)
1328 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1329
1330 // Otherwise only the MVC case is special.
1331 bool MVC = Ty->isIntegerTy(8);
1332 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1333}
1334
1335// Return the addressing mode which seems most desirable given an LLVM
1336// Instruction pointer.
1337static AddressingMode
1340 switch (II->getIntrinsicID()) {
1341 default: break;
1342 case Intrinsic::memset:
1343 case Intrinsic::memmove:
1344 case Intrinsic::memcpy:
1345 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1346 }
1347 }
1348
1349 if (isa<LoadInst>(I) && I->hasOneUse()) {
1350 auto *SingleUser = cast<Instruction>(*I->user_begin());
1351 if (SingleUser->getParent() == I->getParent()) {
1352 if (isa<ICmpInst>(SingleUser)) {
1353 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1354 if (C->getBitWidth() <= 64 &&
1355 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1356 // Comparison of memory with 16 bit signed / unsigned immediate
1357 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1358 } else if (isa<StoreInst>(SingleUser))
1359 // Load->Store
1360 return getLoadStoreAddrMode(HasVector, I->getType());
1361 }
1362 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1363 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1364 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1365 // Load->Store
1366 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1367 }
1368
1369 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1370
1371 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1372 // dependencies (LDE only supports small offsets).
1373 // * Utilize the vector registers to hold floating point
1374 // values (vector load / store instructions only support small
1375 // offsets).
1376
1377 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1378 I->getOperand(0)->getType());
1379 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1380 bool IsVectorAccess = MemAccessTy->isVectorTy();
1381
1382 // A store of an extracted vector element will be combined into a VSTE type
1383 // instruction.
1384 if (!IsVectorAccess && isa<StoreInst>(I)) {
1385 Value *DataOp = I->getOperand(0);
1386 if (isa<ExtractElementInst>(DataOp))
1387 IsVectorAccess = true;
1388 }
1389
1390 // A load which gets inserted into a vector element will be combined into a
1391 // VLE type instruction.
1392 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1393 User *LoadUser = *I->user_begin();
1394 if (isa<InsertElementInst>(LoadUser))
1395 IsVectorAccess = true;
1396 }
1397
1398 if (IsFPAccess || IsVectorAccess)
1399 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1400 }
1401
1402 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1403}
1404
1406 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1407 // Punt on globals for now, although they can be used in limited
1408 // RELATIVE LONG cases.
1409 if (AM.BaseGV)
1410 return false;
1411
1412 // Require a 20-bit signed offset.
1413 if (!isInt<20>(AM.BaseOffs))
1414 return false;
1415
1416 bool RequireD12 =
1417 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1418 AddressingMode SupportedAM(!RequireD12, true);
1419 if (I != nullptr)
1420 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1421
1422 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1423 return false;
1424
1425 if (!SupportedAM.IndexReg)
1426 // No indexing allowed.
1427 return AM.Scale == 0;
1428 else
1429 // Indexing is OK but no scale factor can be applied.
1430 return AM.Scale == 0 || AM.Scale == 1;
1431}
1432
1434 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1435 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1436 const AttributeList &FuncAttributes) const {
1437 const int MVCFastLen = 16;
1438
1439 if (Limit != ~unsigned(0)) {
1440 // Don't expand Op into scalar loads/stores in these cases:
1441 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1442 return false; // Small memcpy: Use MVC
1443 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1444 return false; // Small memset (first byte with STC/MVI): Use MVC
1445 if (Op.isZeroMemset())
1446 return false; // Memset zero: Use XC
1447 }
1448
1449 return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
1450 DstAS, SrcAS, FuncAttributes);
1451}
1452
1454 LLVMContext &Context, const MemOp &Op,
1455 const AttributeList &FuncAttributes) const {
1456 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1457}
1458
1459bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1460 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1461 return false;
1462 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1463 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1464 return FromBits > ToBits;
1465}
1466
1468 if (!FromVT.isInteger() || !ToVT.isInteger())
1469 return false;
1470 unsigned FromBits = FromVT.getFixedSizeInBits();
1471 unsigned ToBits = ToVT.getFixedSizeInBits();
1472 return FromBits > ToBits;
1473}
1474
1475//===----------------------------------------------------------------------===//
1476// Inline asm support
1477//===----------------------------------------------------------------------===//
1478
1481 if (Constraint.size() == 1) {
1482 switch (Constraint[0]) {
1483 case 'a': // Address register
1484 case 'd': // Data register (equivalent to 'r')
1485 case 'f': // Floating-point register
1486 case 'h': // High-part register
1487 case 'r': // General-purpose register
1488 case 'v': // Vector register
1489 return C_RegisterClass;
1490
1491 case 'Q': // Memory with base and unsigned 12-bit displacement
1492 case 'R': // Likewise, plus an index
1493 case 'S': // Memory with base and signed 20-bit displacement
1494 case 'T': // Likewise, plus an index
1495 case 'm': // Equivalent to 'T'.
1496 return C_Memory;
1497
1498 case 'I': // Unsigned 8-bit constant
1499 case 'J': // Unsigned 12-bit constant
1500 case 'K': // Signed 16-bit constant
1501 case 'L': // Signed 20-bit displacement (on all targets we support)
1502 case 'M': // 0x7fffffff
1503 return C_Immediate;
1504
1505 default:
1506 break;
1507 }
1508 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1509 switch (Constraint[1]) {
1510 case 'Q': // Address with base and unsigned 12-bit displacement
1511 case 'R': // Likewise, plus an index
1512 case 'S': // Address with base and signed 20-bit displacement
1513 case 'T': // Likewise, plus an index
1514 return C_Address;
1515
1516 default:
1517 break;
1518 }
1519 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1520 if (StringRef("{@cc}").compare(Constraint) == 0)
1521 return C_Other;
1522 }
1523 return TargetLowering::getConstraintType(Constraint);
1524}
1525
1528 AsmOperandInfo &Info, const char *Constraint) const {
1530 Value *CallOperandVal = Info.CallOperandVal;
1531 // If we don't have a value, we can't do a match,
1532 // but allow it at the lowest weight.
1533 if (!CallOperandVal)
1534 return CW_Default;
1535 Type *type = CallOperandVal->getType();
1536 // Look at the constraint type.
1537 switch (*Constraint) {
1538 default:
1539 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1540 break;
1541
1542 case 'a': // Address register
1543 case 'd': // Data register (equivalent to 'r')
1544 case 'h': // High-part register
1545 case 'r': // General-purpose register
1546 Weight =
1547 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1548 break;
1549
1550 case 'f': // Floating-point register
1551 if (!useSoftFloat())
1552 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1553 break;
1554
1555 case 'v': // Vector register
1556 if (Subtarget.hasVector())
1557 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1558 : CW_Default;
1559 break;
1560
1561 case 'I': // Unsigned 8-bit constant
1562 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1563 if (isUInt<8>(C->getZExtValue()))
1564 Weight = CW_Constant;
1565 break;
1566
1567 case 'J': // Unsigned 12-bit constant
1568 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1569 if (isUInt<12>(C->getZExtValue()))
1570 Weight = CW_Constant;
1571 break;
1572
1573 case 'K': // Signed 16-bit constant
1574 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1575 if (isInt<16>(C->getSExtValue()))
1576 Weight = CW_Constant;
1577 break;
1578
1579 case 'L': // Signed 20-bit displacement (on all targets we support)
1580 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1581 if (isInt<20>(C->getSExtValue()))
1582 Weight = CW_Constant;
1583 break;
1584
1585 case 'M': // 0x7fffffff
1586 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1587 if (C->getZExtValue() == 0x7fffffff)
1588 Weight = CW_Constant;
1589 break;
1590 }
1591 return Weight;
1592}
1593
1594// Parse a "{tNNN}" register constraint for which the register type "t"
1595// has already been verified. MC is the class associated with "t" and
1596// Map maps 0-based register numbers to LLVM register numbers.
1597static std::pair<unsigned, const TargetRegisterClass *>
1599 const unsigned *Map, unsigned Size) {
1600 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1601 if (isdigit(Constraint[2])) {
1602 unsigned Index;
1603 bool Failed =
1604 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1605 if (!Failed && Index < Size && Map[Index])
1606 return std::make_pair(Map[Index], RC);
1607 }
1608 return std::make_pair(0U, nullptr);
1609}
1610
1611std::pair<unsigned, const TargetRegisterClass *>
1613 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1614 if (Constraint.size() == 1) {
1615 // GCC Constraint Letters
1616 switch (Constraint[0]) {
1617 default: break;
1618 case 'd': // Data register (equivalent to 'r')
1619 case 'r': // General-purpose register
1620 if (VT.getSizeInBits() == 64)
1621 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1622 else if (VT.getSizeInBits() == 128)
1623 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1624 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1625
1626 case 'a': // Address register
1627 if (VT == MVT::i64)
1628 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1629 else if (VT == MVT::i128)
1630 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1631 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1632
1633 case 'h': // High-part register (an LLVM extension)
1634 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1635
1636 case 'f': // Floating-point register
1637 if (!useSoftFloat()) {
1638 if (VT.getSizeInBits() == 16)
1639 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1640 else if (VT.getSizeInBits() == 64)
1641 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1642 else if (VT.getSizeInBits() == 128)
1643 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1644 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1645 }
1646 break;
1647
1648 case 'v': // Vector register
1649 if (Subtarget.hasVector()) {
1650 if (VT.getSizeInBits() == 16)
1651 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1652 if (VT.getSizeInBits() == 32)
1653 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1654 if (VT.getSizeInBits() == 64)
1655 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1656 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1657 }
1658 break;
1659 }
1660 }
1661 if (Constraint.starts_with("{")) {
1662
1663 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1664 // to check the size on.
1665 auto getVTSizeInBits = [&VT]() {
1666 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1667 };
1668
1669 // We need to override the default register parsing for GPRs and FPRs
1670 // because the interpretation depends on VT. The internal names of
1671 // the registers are also different from the external names
1672 // (F0D and F0S instead of F0, etc.).
1673 if (Constraint[1] == 'r') {
1674 if (getVTSizeInBits() == 32)
1675 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1677 if (getVTSizeInBits() == 128)
1678 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1680 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1682 }
1683 if (Constraint[1] == 'f') {
1684 if (useSoftFloat())
1685 return std::make_pair(
1686 0u, static_cast<const TargetRegisterClass *>(nullptr));
1687 if (getVTSizeInBits() == 16)
1688 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1690 if (getVTSizeInBits() == 32)
1691 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1693 if (getVTSizeInBits() == 128)
1694 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1696 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1698 }
1699 if (Constraint[1] == 'v') {
1700 if (!Subtarget.hasVector())
1701 return std::make_pair(
1702 0u, static_cast<const TargetRegisterClass *>(nullptr));
1703 if (getVTSizeInBits() == 16)
1704 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1709 if (getVTSizeInBits() == 64)
1710 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1714 }
1715 if (Constraint[1] == '@') {
1716 if (StringRef("{@cc}").compare(Constraint) == 0)
1717 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1718 }
1719 }
1720 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1721}
1722
1723// FIXME? Maybe this could be a TableGen attribute on some registers and
1724// this table could be generated automatically from RegInfo.
1727 const MachineFunction &MF) const {
1728 Register Reg =
1730 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1731 : SystemZ::NoRegister)
1732 .Case("r15",
1733 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1734 .Default(Register());
1735
1736 return Reg;
1737}
1738
1740 const Constant *PersonalityFn) const {
1741 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1742}
1743
1745 const Constant *PersonalityFn) const {
1746 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1747}
1748
1749// Convert condition code in CCReg to an i32 value.
1751 SDLoc DL(CCReg);
1752 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1753 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1754 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1755}
1756
1757// Lower @cc targets via setcc.
1759 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1760 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1761 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1762 return SDValue();
1763
1764 // Check that return type is valid.
1765 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1766 OpInfo.ConstraintVT.getSizeInBits() < 8)
1767 report_fatal_error("Glue output operand is of invalid type");
1768
1769 if (Glue.getNode()) {
1770 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1771 Chain = Glue.getValue(1);
1772 } else
1773 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1774 return getCCResult(DAG, Glue);
1775}
1776
1778 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1779 SelectionDAG &DAG) const {
1780 // Only support length 1 constraints for now.
1781 if (Constraint.size() == 1) {
1782 switch (Constraint[0]) {
1783 case 'I': // Unsigned 8-bit constant
1784 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1785 if (isUInt<8>(C->getZExtValue()))
1786 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1787 Op.getValueType()));
1788 return;
1789
1790 case 'J': // Unsigned 12-bit constant
1791 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1792 if (isUInt<12>(C->getZExtValue()))
1793 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1794 Op.getValueType()));
1795 return;
1796
1797 case 'K': // Signed 16-bit constant
1798 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1799 if (isInt<16>(C->getSExtValue()))
1800 Ops.push_back(DAG.getSignedTargetConstant(
1801 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1802 return;
1803
1804 case 'L': // Signed 20-bit displacement (on all targets we support)
1805 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1806 if (isInt<20>(C->getSExtValue()))
1807 Ops.push_back(DAG.getSignedTargetConstant(
1808 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1809 return;
1810
1811 case 'M': // 0x7fffffff
1812 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1813 if (C->getZExtValue() == 0x7fffffff)
1814 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1815 Op.getValueType()));
1816 return;
1817 }
1818 }
1820}
1821
1822//===----------------------------------------------------------------------===//
1823// Calling conventions
1824//===----------------------------------------------------------------------===//
1825
1826#include "SystemZGenCallingConv.inc"
1827
1829 CallingConv::ID) const {
1830 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1831 SystemZ::R14D, 0 };
1832 return ScratchRegs;
1833}
1834
1836 Type *ToType) const {
1837 return isTruncateFree(FromType, ToType);
1838}
1839
1841 return CI->isTailCall();
1842}
1843
1844// Value is a value that has been passed to us in the location described by VA
1845// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1846// any loads onto Chain.
1848 CCValAssign &VA, SDValue Chain,
1849 SDValue Value) {
1850 // If the argument has been promoted from a smaller type, insert an
1851 // assertion to capture this.
1852 if (VA.getLocInfo() == CCValAssign::SExt)
1854 DAG.getValueType(VA.getValVT()));
1855 else if (VA.getLocInfo() == CCValAssign::ZExt)
1857 DAG.getValueType(VA.getValVT()));
1858
1859 if (VA.isExtInLoc())
1860 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1861 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1862 // If this is a short vector argument loaded from the stack,
1863 // extend from i64 to full vector size and then bitcast.
1864 assert(VA.getLocVT() == MVT::i64);
1865 assert(VA.getValVT().isVector());
1866 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1867 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1868 } else
1869 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1870 return Value;
1871}
1872
1873// Value is a value of type VA.getValVT() that we need to copy into
1874// the location described by VA. Return a copy of Value converted to
1875// VA.getValVT(). The caller is responsible for handling indirect values.
1877 CCValAssign &VA, SDValue Value) {
1878 switch (VA.getLocInfo()) {
1879 case CCValAssign::SExt:
1880 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1881 case CCValAssign::ZExt:
1882 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1883 case CCValAssign::AExt:
1884 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1885 case CCValAssign::BCvt: {
1886 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1887 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1888 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1889 // For an f32 vararg we need to first promote it to an f64 and then
1890 // bitcast it to an i64.
1891 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1892 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1893 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1894 ? MVT::v2i64
1895 : VA.getLocVT();
1896 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1897 // For ELF, this is a short vector argument to be stored to the stack,
1898 // bitcast to v2i64 and then extract first element.
1899 if (BitCastToType == MVT::v2i64)
1900 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1901 DAG.getConstant(0, DL, MVT::i32));
1902 return Value;
1903 }
1904 case CCValAssign::Full:
1905 return Value;
1906 default:
1907 llvm_unreachable("Unhandled getLocInfo()");
1908 }
1909}
1910
1912 SDLoc DL(In);
1913 SDValue Lo, Hi;
1914 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1915 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1916 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1917 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1918 DAG.getConstant(64, DL, MVT::i32)));
1919 } else {
1920 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1921 }
1922
1923 // FIXME: If v2i64 were a legal type, we could use it instead of
1924 // Untyped here. This might enable improved folding.
1925 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1926 MVT::Untyped, Hi, Lo);
1927 return SDValue(Pair, 0);
1928}
1929
1931 SDLoc DL(In);
1932 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1933 DL, MVT::i64, In);
1934 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1935 DL, MVT::i64, In);
1936
1937 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1938 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1939 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1940 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1941 DAG.getConstant(64, DL, MVT::i32));
1942 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1943 } else {
1944 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1945 }
1946}
1947
1949 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1950 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1951 EVT ValueVT = Val.getValueType();
1952 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1953 // Inline assembly operand.
1954 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1955 return true;
1956 }
1957
1958 return false;
1959}
1960
1962 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1963 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1964 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1965 // Inline assembly operand.
1966 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1967 return DAG.getBitcast(ValueVT, Res);
1968 }
1969
1970 return SDValue();
1971}
1972
1973// The first part of a split stack argument is at index I in Args (and
1974// ArgLocs). Return the type of a part and the number of them by reference.
1975template <class ArgTy>
1977 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
1978 MVT &PartVT, unsigned &NumParts) {
1979 if (!Args[I].Flags.isSplit())
1980 return false;
1981 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
1982 "ArgLocs havoc.");
1983 PartVT = ArgLocs[I].getValVT();
1984 NumParts = 1;
1985 for (unsigned PartIdx = I + 1;; ++PartIdx) {
1986 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
1987 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
1988 ++NumParts;
1989 if (Args[PartIdx].Flags.isSplitEnd())
1990 break;
1991 }
1992 return true;
1993}
1994
1996 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1997 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1998 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2000 MachineFrameInfo &MFI = MF.getFrameInfo();
2002 SystemZMachineFunctionInfo *FuncInfo =
2004 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2005 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2006
2007 // Assign locations to all of the incoming arguments.
2009 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2010 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2011 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2012
2013 unsigned NumFixedGPRs = 0;
2014 unsigned NumFixedFPRs = 0;
2015 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2016 SDValue ArgValue;
2017 CCValAssign &VA = ArgLocs[I];
2018 EVT LocVT = VA.getLocVT();
2019 if (VA.isRegLoc()) {
2020 // Arguments passed in registers
2021 const TargetRegisterClass *RC;
2022 switch (LocVT.getSimpleVT().SimpleTy) {
2023 default:
2024 // Integers smaller than i64 should be promoted to i64.
2025 llvm_unreachable("Unexpected argument type");
2026 case MVT::i32:
2027 NumFixedGPRs += 1;
2028 RC = &SystemZ::GR32BitRegClass;
2029 break;
2030 case MVT::i64:
2031 NumFixedGPRs += 1;
2032 RC = &SystemZ::GR64BitRegClass;
2033 break;
2034 case MVT::f16:
2035 NumFixedFPRs += 1;
2036 RC = &SystemZ::FP16BitRegClass;
2037 break;
2038 case MVT::f32:
2039 NumFixedFPRs += 1;
2040 RC = &SystemZ::FP32BitRegClass;
2041 break;
2042 case MVT::f64:
2043 NumFixedFPRs += 1;
2044 RC = &SystemZ::FP64BitRegClass;
2045 break;
2046 case MVT::f128:
2047 NumFixedFPRs += 2;
2048 RC = &SystemZ::FP128BitRegClass;
2049 break;
2050 case MVT::v16i8:
2051 case MVT::v8i16:
2052 case MVT::v4i32:
2053 case MVT::v2i64:
2054 case MVT::v4f32:
2055 case MVT::v2f64:
2056 RC = &SystemZ::VR128BitRegClass;
2057 break;
2058 }
2059
2060 Register VReg = MRI.createVirtualRegister(RC);
2061 MRI.addLiveIn(VA.getLocReg(), VReg);
2062 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2063 } else {
2064 assert(VA.isMemLoc() && "Argument not register or memory");
2065
2066 // Create the frame index object for this incoming parameter.
2067 // FIXME: Pre-include call frame size in the offset, should not
2068 // need to manually add it here.
2069 int64_t ArgSPOffset = VA.getLocMemOffset();
2070 if (Subtarget.isTargetXPLINK64()) {
2071 auto &XPRegs =
2072 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2073 ArgSPOffset += XPRegs.getCallFrameSize();
2074 }
2075 int FI =
2076 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2077
2078 // Create the SelectionDAG nodes corresponding to a load
2079 // from this parameter. Unpromoted ints and floats are
2080 // passed as right-justified 8-byte values.
2081 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2082 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2083 VA.getLocVT() == MVT::f16) {
2084 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2085 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2086 DAG.getIntPtrConstant(SlotOffs, DL));
2087 }
2088 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2090 }
2091
2092 // Convert the value of the argument register into the value that's
2093 // being passed.
2094 if (VA.getLocInfo() == CCValAssign::Indirect) {
2095 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2097 // If the original argument was split (e.g. i128), we need
2098 // to load all parts of it here (using the same address).
2099 MVT PartVT;
2100 unsigned NumParts;
2101 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2102 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2103 ++I;
2104 CCValAssign &PartVA = ArgLocs[I];
2105 unsigned PartOffset = Ins[I].PartOffset;
2106 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2107 DAG.getIntPtrConstant(PartOffset, DL));
2108 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2110 assert(PartOffset && "Offset should be non-zero.");
2111 }
2112 }
2113 } else
2114 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2115 }
2116
2117 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2118 // Save the number of non-varargs registers for later use by va_start, etc.
2119 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2120 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2121
2122 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2123 Subtarget.getSpecialRegisters());
2124
2125 // Likewise the address (in the form of a frame index) of where the
2126 // first stack vararg would be. The 1-byte size here is arbitrary.
2127 // FIXME: Pre-include call frame size in the offset, should not
2128 // need to manually add it here.
2129 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2130 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2131 FuncInfo->setVarArgsFrameIndex(FI);
2132 }
2133
2134 if (IsVarArg && Subtarget.isTargetELF()) {
2135 // Save the number of non-varargs registers for later use by va_start, etc.
2136 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2137 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2138
2139 // Likewise the address (in the form of a frame index) of where the
2140 // first stack vararg would be. The 1-byte size here is arbitrary.
2141 int64_t VarArgsOffset = CCInfo.getStackSize();
2142 FuncInfo->setVarArgsFrameIndex(
2143 MFI.CreateFixedObject(1, VarArgsOffset, true));
2144
2145 // ...and a similar frame index for the caller-allocated save area
2146 // that will be used to store the incoming registers.
2147 int64_t RegSaveOffset =
2148 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2149 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2150 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2151
2152 // Store the FPR varargs in the reserved frame slots. (We store the
2153 // GPRs as part of the prologue.)
2154 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2156 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2157 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2158 int FI =
2160 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2162 &SystemZ::FP64BitRegClass);
2163 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2164 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2166 }
2167 // Join the stores, which are independent of one another.
2168 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2169 ArrayRef(&MemOps[NumFixedFPRs],
2170 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2171 }
2172 }
2173
2174 if (Subtarget.isTargetXPLINK64()) {
2175 // Create virual register for handling incoming "ADA" special register (R5)
2176 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2177 Register ADAvReg = MRI.createVirtualRegister(RC);
2178 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2179 Subtarget.getSpecialRegisters());
2180 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2181 FuncInfo->setADAVirtualRegister(ADAvReg);
2182 }
2183 return Chain;
2184}
2185
2186static bool canUseSiblingCall(const CCState &ArgCCInfo,
2189 // Punt if there are any indirect or stack arguments, or if the call
2190 // needs the callee-saved argument register R6, or if the call uses
2191 // the callee-saved register arguments SwiftSelf and SwiftError.
2192 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2193 CCValAssign &VA = ArgLocs[I];
2195 return false;
2196 if (!VA.isRegLoc())
2197 return false;
2198 Register Reg = VA.getLocReg();
2199 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2200 return false;
2201 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2202 return false;
2203 }
2204 return true;
2205}
2206
2208 unsigned Offset, bool LoadAdr = false) {
2211 Register ADAvReg = MFI->getADAVirtualRegister();
2213
2214 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2215 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2216
2217 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2218 if (!LoadAdr)
2219 Result = DAG.getLoad(
2220 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2222
2223 return Result;
2224}
2225
2226// ADA access using Global value
2227// Note: for functions, address of descriptor is returned
2229 EVT PtrVT) {
2230 unsigned ADAtype;
2231 bool LoadAddr = false;
2232 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2233 bool IsFunction =
2234 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2235 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2236
2237 if (IsFunction) {
2238 if (IsInternal) {
2240 LoadAddr = true;
2241 } else
2243 } else {
2245 }
2246 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2247
2248 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2249}
2250
2251static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2252 SDLoc &DL, SDValue &Chain) {
2253 unsigned ADADelta = 0; // ADA offset in desc.
2254 unsigned EPADelta = 8; // EPA offset in desc.
2257
2258 // XPLink calling convention.
2259 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2260 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2261 G->getGlobal()->hasPrivateLinkage());
2262 if (IsInternal) {
2265 Register ADAvReg = MFI->getADAVirtualRegister();
2266 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2267 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2268 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2269 return true;
2270 } else {
2272 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2273 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2274 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2275 }
2276 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2278 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2279 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2280 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2281 } else {
2282 // Function pointer case
2283 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2284 DAG.getConstant(ADADelta, DL, PtrVT));
2285 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2287 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2288 DAG.getConstant(EPADelta, DL, PtrVT));
2289 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2291 }
2292 return false;
2293}
2294
2295SDValue
2297 SmallVectorImpl<SDValue> &InVals) const {
2298 SelectionDAG &DAG = CLI.DAG;
2299 SDLoc &DL = CLI.DL;
2301 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2303 SDValue Chain = CLI.Chain;
2304 SDValue Callee = CLI.Callee;
2305 bool &IsTailCall = CLI.IsTailCall;
2306 CallingConv::ID CallConv = CLI.CallConv;
2307 bool IsVarArg = CLI.IsVarArg;
2309 EVT PtrVT = getPointerTy(MF.getDataLayout());
2310 LLVMContext &Ctx = *DAG.getContext();
2311 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2312
2313 // FIXME: z/OS support to be added in later.
2314 if (Subtarget.isTargetXPLINK64())
2315 IsTailCall = false;
2316
2317 // Integer args <=32 bits should have an extension attribute.
2318 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2319
2320 // Analyze the operands of the call, assigning locations to each operand.
2322 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2323 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2324
2325 // We don't support GuaranteedTailCallOpt, only automatically-detected
2326 // sibling calls.
2327 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2328 IsTailCall = false;
2329
2330 // Get a count of how many bytes are to be pushed on the stack.
2331 unsigned NumBytes = ArgCCInfo.getStackSize();
2332
2333 // Mark the start of the call.
2334 if (!IsTailCall)
2335 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2336
2337 // Copy argument values to their designated locations.
2339 SmallVector<SDValue, 8> MemOpChains;
2340 SDValue StackPtr;
2341 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2342 CCValAssign &VA = ArgLocs[I];
2343 SDValue ArgValue = OutVals[I];
2344
2345 if (VA.getLocInfo() == CCValAssign::Indirect) {
2346 // Store the argument in a stack slot and pass its address.
2347 EVT SlotVT;
2348 MVT PartVT;
2349 unsigned NumParts = 1;
2350 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2351 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2352 else
2353 SlotVT = Outs[I].VT;
2354 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2355 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2356 MemOpChains.push_back(
2357 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2359 // If the original argument was split (e.g. i128), we need
2360 // to store all parts of it here (and pass just one address).
2361 assert(Outs[I].PartOffset == 0);
2362 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2363 ++I;
2364 SDValue PartValue = OutVals[I];
2365 unsigned PartOffset = Outs[I].PartOffset;
2366 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2367 DAG.getIntPtrConstant(PartOffset, DL));
2368 MemOpChains.push_back(
2369 DAG.getStore(Chain, DL, PartValue, Address,
2371 assert(PartOffset && "Offset should be non-zero.");
2372 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2373 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2374 }
2375 ArgValue = SpillSlot;
2376 } else
2377 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2378
2379 if (VA.isRegLoc()) {
2380 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2381 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2382 // and low values.
2383 if (VA.getLocVT() == MVT::i128)
2384 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2385 // Queue up the argument copies and emit them at the end.
2386 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2387 } else {
2388 assert(VA.isMemLoc() && "Argument not register or memory");
2389
2390 // Work out the address of the stack slot. Unpromoted ints and
2391 // floats are passed as right-justified 8-byte values.
2392 if (!StackPtr.getNode())
2393 StackPtr = DAG.getCopyFromReg(Chain, DL,
2394 Regs->getStackPointerRegister(), PtrVT);
2395 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2396 VA.getLocMemOffset();
2397 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2398 Offset += 4;
2399 else if (VA.getLocVT() == MVT::f16)
2400 Offset += 6;
2401 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2403
2404 // Emit the store.
2405 MemOpChains.push_back(
2406 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2407
2408 // Although long doubles or vectors are passed through the stack when
2409 // they are vararg (non-fixed arguments), if a long double or vector
2410 // occupies the third and fourth slot of the argument list GPR3 should
2411 // still shadow the third slot of the argument list.
2412 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2413 SDValue ShadowArgValue =
2414 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2415 DAG.getIntPtrConstant(1, DL));
2416 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2417 }
2418 }
2419 }
2420
2421 // Join the stores, which are independent of one another.
2422 if (!MemOpChains.empty())
2423 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2424
2425 // Accept direct calls by converting symbolic call addresses to the
2426 // associated Target* opcodes. Force %r1 to be used for indirect
2427 // tail calls.
2428 SDValue Glue;
2429
2430 if (Subtarget.isTargetXPLINK64()) {
2431 SDValue ADA;
2432 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2433 if (!IsBRASL) {
2434 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2435 ->getAddressOfCalleeRegister();
2436 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2437 Glue = Chain.getValue(1);
2438 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2439 }
2440 RegsToPass.push_back(std::make_pair(
2441 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2442 } else {
2443 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2444 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2445 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2446 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2447 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2448 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2449 } else if (IsTailCall) {
2450 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2451 Glue = Chain.getValue(1);
2452 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2453 }
2454 }
2455
2456 // Build a sequence of copy-to-reg nodes, chained and glued together.
2457 for (const auto &[Reg, N] : RegsToPass) {
2458 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2459 Glue = Chain.getValue(1);
2460 }
2461
2462 // The first call operand is the chain and the second is the target address.
2464 Ops.push_back(Chain);
2465 Ops.push_back(Callee);
2466
2467 // Add argument registers to the end of the list so that they are
2468 // known live into the call.
2469 for (const auto &[Reg, N] : RegsToPass)
2470 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2471
2472 // Add a register mask operand representing the call-preserved registers.
2473 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2474 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2475 assert(Mask && "Missing call preserved mask for calling convention");
2476 Ops.push_back(DAG.getRegisterMask(Mask));
2477
2478 // Glue the call to the argument copies, if any.
2479 if (Glue.getNode())
2480 Ops.push_back(Glue);
2481
2482 // Emit the call.
2483 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2484 if (IsTailCall) {
2485 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2486 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2487 return Ret;
2488 }
2489 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2490 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2491 Glue = Chain.getValue(1);
2492
2493 // Mark the end of the call, which is glued to the call itself.
2494 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2495 Glue = Chain.getValue(1);
2496
2497 // Assign locations to each value returned by this call.
2499 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2500 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2501
2502 // Copy all of the result registers out of their specified physreg.
2503 for (CCValAssign &VA : RetLocs) {
2504 // Copy the value out, gluing the copy to the end of the call sequence.
2505 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2506 VA.getLocVT(), Glue);
2507 Chain = RetValue.getValue(1);
2508 Glue = RetValue.getValue(2);
2509
2510 // Convert the value of the return register into the value that's
2511 // being returned.
2512 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2513 }
2514
2515 return Chain;
2516}
2517
2518// Generate a call taking the given operands as arguments and returning a
2519// result of type RetVT.
2521 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2522 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2523 bool DoesNotReturn, bool IsReturnValueUsed) const {
2525 Args.reserve(Ops.size());
2526
2527 for (SDValue Op : Ops) {
2529 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2530 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2531 Entry.IsZExt = !Entry.IsSExt;
2532 Args.push_back(Entry);
2533 }
2534
2535 SDValue Callee =
2536 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2537
2538 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2540 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2541 CLI.setDebugLoc(DL)
2542 .setChain(Chain)
2543 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2544 .setNoReturn(DoesNotReturn)
2545 .setDiscardResult(!IsReturnValueUsed)
2546 .setSExtResult(SignExtend)
2547 .setZExtResult(!SignExtend);
2548 return LowerCallTo(CLI);
2549}
2550
2552 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2553 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2554 const Type *RetTy) const {
2555 // Special case that we cannot easily detect in RetCC_SystemZ since
2556 // i128 may not be a legal type.
2557 for (auto &Out : Outs)
2558 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2559 return false;
2560
2562 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2563 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2564}
2565
2566SDValue
2568 bool IsVarArg,
2570 const SmallVectorImpl<SDValue> &OutVals,
2571 const SDLoc &DL, SelectionDAG &DAG) const {
2573
2574 // Integer args <=32 bits should have an extension attribute.
2575 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2576
2577 // Assign locations to each returned value.
2579 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2580 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2581
2582 // Quick exit for void returns
2583 if (RetLocs.empty())
2584 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2585
2586 if (CallConv == CallingConv::GHC)
2587 report_fatal_error("GHC functions return void only");
2588
2589 // Copy the result values into the output registers.
2590 SDValue Glue;
2592 RetOps.push_back(Chain);
2593 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2594 CCValAssign &VA = RetLocs[I];
2595 SDValue RetValue = OutVals[I];
2596
2597 // Make the return register live on exit.
2598 assert(VA.isRegLoc() && "Can only return in registers!");
2599
2600 // Promote the value as required.
2601 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2602
2603 // Chain and glue the copies together.
2604 Register Reg = VA.getLocReg();
2605 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2606 Glue = Chain.getValue(1);
2607 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2608 }
2609
2610 // Update chain and glue.
2611 RetOps[0] = Chain;
2612 if (Glue.getNode())
2613 RetOps.push_back(Glue);
2614
2615 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2616}
2617
2618// Return true if Op is an intrinsic node with chain that returns the CC value
2619// as its only (other) argument. Provide the associated SystemZISD opcode and
2620// the mask of valid CC values if so.
2621static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2622 unsigned &CCValid) {
2623 unsigned Id = Op.getConstantOperandVal(1);
2624 switch (Id) {
2625 case Intrinsic::s390_tbegin:
2626 Opcode = SystemZISD::TBEGIN;
2627 CCValid = SystemZ::CCMASK_TBEGIN;
2628 return true;
2629
2630 case Intrinsic::s390_tbegin_nofloat:
2631 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2632 CCValid = SystemZ::CCMASK_TBEGIN;
2633 return true;
2634
2635 case Intrinsic::s390_tend:
2636 Opcode = SystemZISD::TEND;
2637 CCValid = SystemZ::CCMASK_TEND;
2638 return true;
2639
2640 default:
2641 return false;
2642 }
2643}
2644
2645// Return true if Op is an intrinsic node without chain that returns the
2646// CC value as its final argument. Provide the associated SystemZISD
2647// opcode and the mask of valid CC values if so.
2648static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2649 unsigned Id = Op.getConstantOperandVal(0);
2650 switch (Id) {
2651 case Intrinsic::s390_vpkshs:
2652 case Intrinsic::s390_vpksfs:
2653 case Intrinsic::s390_vpksgs:
2654 Opcode = SystemZISD::PACKS_CC;
2655 CCValid = SystemZ::CCMASK_VCMP;
2656 return true;
2657
2658 case Intrinsic::s390_vpklshs:
2659 case Intrinsic::s390_vpklsfs:
2660 case Intrinsic::s390_vpklsgs:
2661 Opcode = SystemZISD::PACKLS_CC;
2662 CCValid = SystemZ::CCMASK_VCMP;
2663 return true;
2664
2665 case Intrinsic::s390_vceqbs:
2666 case Intrinsic::s390_vceqhs:
2667 case Intrinsic::s390_vceqfs:
2668 case Intrinsic::s390_vceqgs:
2669 case Intrinsic::s390_vceqqs:
2670 Opcode = SystemZISD::VICMPES;
2671 CCValid = SystemZ::CCMASK_VCMP;
2672 return true;
2673
2674 case Intrinsic::s390_vchbs:
2675 case Intrinsic::s390_vchhs:
2676 case Intrinsic::s390_vchfs:
2677 case Intrinsic::s390_vchgs:
2678 case Intrinsic::s390_vchqs:
2679 Opcode = SystemZISD::VICMPHS;
2680 CCValid = SystemZ::CCMASK_VCMP;
2681 return true;
2682
2683 case Intrinsic::s390_vchlbs:
2684 case Intrinsic::s390_vchlhs:
2685 case Intrinsic::s390_vchlfs:
2686 case Intrinsic::s390_vchlgs:
2687 case Intrinsic::s390_vchlqs:
2688 Opcode = SystemZISD::VICMPHLS;
2689 CCValid = SystemZ::CCMASK_VCMP;
2690 return true;
2691
2692 case Intrinsic::s390_vtm:
2693 Opcode = SystemZISD::VTM;
2694 CCValid = SystemZ::CCMASK_VCMP;
2695 return true;
2696
2697 case Intrinsic::s390_vfaebs:
2698 case Intrinsic::s390_vfaehs:
2699 case Intrinsic::s390_vfaefs:
2700 Opcode = SystemZISD::VFAE_CC;
2701 CCValid = SystemZ::CCMASK_ANY;
2702 return true;
2703
2704 case Intrinsic::s390_vfaezbs:
2705 case Intrinsic::s390_vfaezhs:
2706 case Intrinsic::s390_vfaezfs:
2707 Opcode = SystemZISD::VFAEZ_CC;
2708 CCValid = SystemZ::CCMASK_ANY;
2709 return true;
2710
2711 case Intrinsic::s390_vfeebs:
2712 case Intrinsic::s390_vfeehs:
2713 case Intrinsic::s390_vfeefs:
2714 Opcode = SystemZISD::VFEE_CC;
2715 CCValid = SystemZ::CCMASK_ANY;
2716 return true;
2717
2718 case Intrinsic::s390_vfeezbs:
2719 case Intrinsic::s390_vfeezhs:
2720 case Intrinsic::s390_vfeezfs:
2721 Opcode = SystemZISD::VFEEZ_CC;
2722 CCValid = SystemZ::CCMASK_ANY;
2723 return true;
2724
2725 case Intrinsic::s390_vfenebs:
2726 case Intrinsic::s390_vfenehs:
2727 case Intrinsic::s390_vfenefs:
2728 Opcode = SystemZISD::VFENE_CC;
2729 CCValid = SystemZ::CCMASK_ANY;
2730 return true;
2731
2732 case Intrinsic::s390_vfenezbs:
2733 case Intrinsic::s390_vfenezhs:
2734 case Intrinsic::s390_vfenezfs:
2735 Opcode = SystemZISD::VFENEZ_CC;
2736 CCValid = SystemZ::CCMASK_ANY;
2737 return true;
2738
2739 case Intrinsic::s390_vistrbs:
2740 case Intrinsic::s390_vistrhs:
2741 case Intrinsic::s390_vistrfs:
2742 Opcode = SystemZISD::VISTR_CC;
2744 return true;
2745
2746 case Intrinsic::s390_vstrcbs:
2747 case Intrinsic::s390_vstrchs:
2748 case Intrinsic::s390_vstrcfs:
2749 Opcode = SystemZISD::VSTRC_CC;
2750 CCValid = SystemZ::CCMASK_ANY;
2751 return true;
2752
2753 case Intrinsic::s390_vstrczbs:
2754 case Intrinsic::s390_vstrczhs:
2755 case Intrinsic::s390_vstrczfs:
2756 Opcode = SystemZISD::VSTRCZ_CC;
2757 CCValid = SystemZ::CCMASK_ANY;
2758 return true;
2759
2760 case Intrinsic::s390_vstrsb:
2761 case Intrinsic::s390_vstrsh:
2762 case Intrinsic::s390_vstrsf:
2763 Opcode = SystemZISD::VSTRS_CC;
2764 CCValid = SystemZ::CCMASK_ANY;
2765 return true;
2766
2767 case Intrinsic::s390_vstrszb:
2768 case Intrinsic::s390_vstrszh:
2769 case Intrinsic::s390_vstrszf:
2770 Opcode = SystemZISD::VSTRSZ_CC;
2771 CCValid = SystemZ::CCMASK_ANY;
2772 return true;
2773
2774 case Intrinsic::s390_vfcedbs:
2775 case Intrinsic::s390_vfcesbs:
2776 Opcode = SystemZISD::VFCMPES;
2777 CCValid = SystemZ::CCMASK_VCMP;
2778 return true;
2779
2780 case Intrinsic::s390_vfchdbs:
2781 case Intrinsic::s390_vfchsbs:
2782 Opcode = SystemZISD::VFCMPHS;
2783 CCValid = SystemZ::CCMASK_VCMP;
2784 return true;
2785
2786 case Intrinsic::s390_vfchedbs:
2787 case Intrinsic::s390_vfchesbs:
2788 Opcode = SystemZISD::VFCMPHES;
2789 CCValid = SystemZ::CCMASK_VCMP;
2790 return true;
2791
2792 case Intrinsic::s390_vftcidb:
2793 case Intrinsic::s390_vftcisb:
2794 Opcode = SystemZISD::VFTCI;
2795 CCValid = SystemZ::CCMASK_VCMP;
2796 return true;
2797
2798 case Intrinsic::s390_tdc:
2799 Opcode = SystemZISD::TDC;
2800 CCValid = SystemZ::CCMASK_TDC;
2801 return true;
2802
2803 default:
2804 return false;
2805 }
2806}
2807
2808// Emit an intrinsic with chain and an explicit CC register result.
2810 unsigned Opcode) {
2811 // Copy all operands except the intrinsic ID.
2812 unsigned NumOps = Op.getNumOperands();
2814 Ops.reserve(NumOps - 1);
2815 Ops.push_back(Op.getOperand(0));
2816 for (unsigned I = 2; I < NumOps; ++I)
2817 Ops.push_back(Op.getOperand(I));
2818
2819 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2820 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2821 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2822 SDValue OldChain = SDValue(Op.getNode(), 1);
2823 SDValue NewChain = SDValue(Intr.getNode(), 1);
2824 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2825 return Intr.getNode();
2826}
2827
2828// Emit an intrinsic with an explicit CC register result.
2830 unsigned Opcode) {
2831 // Copy all operands except the intrinsic ID.
2832 SDLoc DL(Op);
2833 unsigned NumOps = Op.getNumOperands();
2835 Ops.reserve(NumOps - 1);
2836 for (unsigned I = 1; I < NumOps; ++I) {
2837 SDValue CurrOper = Op.getOperand(I);
2838 if (CurrOper.getValueType() == MVT::f16) {
2839 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2840 "Unhandled intrinsic with f16 operand.");
2841 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2842 }
2843 Ops.push_back(CurrOper);
2844 }
2845
2846 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2847 return Intr.getNode();
2848}
2849
2850// CC is a comparison that will be implemented using an integer or
2851// floating-point comparison. Return the condition code mask for
2852// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2853// unsigned comparisons and clear for signed ones. In the floating-point
2854// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2856#define CONV(X) \
2857 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2858 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2859 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2860
2861 switch (CC) {
2862 default:
2863 llvm_unreachable("Invalid integer condition!");
2864
2865 CONV(EQ);
2866 CONV(NE);
2867 CONV(GT);
2868 CONV(GE);
2869 CONV(LT);
2870 CONV(LE);
2871
2872 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2874 }
2875#undef CONV
2876}
2877
2878// If C can be converted to a comparison against zero, adjust the operands
2879// as necessary.
2880static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2881 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2882 return;
2883
2884 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2885 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2886 return;
2887
2888 int64_t Value = ConstOp1->getSExtValue();
2889 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2890 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2891 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2892 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2893 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2894 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2895 }
2896}
2897
2898// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2899// adjust the operands as necessary.
2900static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2901 Comparison &C) {
2902 // For us to make any changes, it must a comparison between a single-use
2903 // load and a constant.
2904 if (!C.Op0.hasOneUse() ||
2905 C.Op0.getOpcode() != ISD::LOAD ||
2906 C.Op1.getOpcode() != ISD::Constant)
2907 return;
2908
2909 // We must have an 8- or 16-bit load.
2910 auto *Load = cast<LoadSDNode>(C.Op0);
2911 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2912 if ((NumBits != 8 && NumBits != 16) ||
2913 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2914 return;
2915
2916 // The load must be an extending one and the constant must be within the
2917 // range of the unextended value.
2918 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2919 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2920 return;
2921 uint64_t Value = ConstOp1->getZExtValue();
2922 uint64_t Mask = (1 << NumBits) - 1;
2923 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2924 // Make sure that ConstOp1 is in range of C.Op0.
2925 int64_t SignedValue = ConstOp1->getSExtValue();
2926 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2927 return;
2928 if (C.ICmpType != SystemZICMP::SignedOnly) {
2929 // Unsigned comparison between two sign-extended values is equivalent
2930 // to unsigned comparison between two zero-extended values.
2931 Value &= Mask;
2932 } else if (NumBits == 8) {
2933 // Try to treat the comparison as unsigned, so that we can use CLI.
2934 // Adjust CCMask and Value as necessary.
2935 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2936 // Test whether the high bit of the byte is set.
2937 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2938 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2939 // Test whether the high bit of the byte is clear.
2940 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2941 else
2942 // No instruction exists for this combination.
2943 return;
2944 C.ICmpType = SystemZICMP::UnsignedOnly;
2945 }
2946 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2947 if (Value > Mask)
2948 return;
2949 // If the constant is in range, we can use any comparison.
2950 C.ICmpType = SystemZICMP::Any;
2951 } else
2952 return;
2953
2954 // Make sure that the first operand is an i32 of the right extension type.
2955 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2958 if (C.Op0.getValueType() != MVT::i32 ||
2959 Load->getExtensionType() != ExtType) {
2960 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2961 Load->getBasePtr(), Load->getPointerInfo(),
2962 Load->getMemoryVT(), Load->getAlign(),
2963 Load->getMemOperand()->getFlags());
2964 // Update the chain uses.
2965 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2966 }
2967
2968 // Make sure that the second operand is an i32 with the right value.
2969 if (C.Op1.getValueType() != MVT::i32 ||
2970 Value != ConstOp1->getZExtValue())
2971 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2972}
2973
2974// Return true if Op is either an unextended load, or a load suitable
2975// for integer register-memory comparisons of type ICmpType.
2976static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2977 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2978 if (Load) {
2979 // There are no instructions to compare a register with a memory byte.
2980 if (Load->getMemoryVT() == MVT::i8)
2981 return false;
2982 // Otherwise decide on extension type.
2983 switch (Load->getExtensionType()) {
2984 case ISD::NON_EXTLOAD:
2985 return true;
2986 case ISD::SEXTLOAD:
2987 return ICmpType != SystemZICMP::UnsignedOnly;
2988 case ISD::ZEXTLOAD:
2989 return ICmpType != SystemZICMP::SignedOnly;
2990 default:
2991 break;
2992 }
2993 }
2994 return false;
2995}
2996
2997// Return true if it is better to swap the operands of C.
2998static bool shouldSwapCmpOperands(const Comparison &C) {
2999 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3000 if (C.Op0.getValueType() == MVT::i128)
3001 return false;
3002 if (C.Op0.getValueType() == MVT::f128)
3003 return false;
3004
3005 // Always keep a floating-point constant second, since comparisons with
3006 // zero can use LOAD TEST and comparisons with other constants make a
3007 // natural memory operand.
3008 if (isa<ConstantFPSDNode>(C.Op1))
3009 return false;
3010
3011 // Never swap comparisons with zero since there are many ways to optimize
3012 // those later.
3013 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3014 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3015 return false;
3016
3017 // Also keep natural memory operands second if the loaded value is
3018 // only used here. Several comparisons have memory forms.
3019 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3020 return false;
3021
3022 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3023 // In that case we generally prefer the memory to be second.
3024 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3025 // The only exceptions are when the second operand is a constant and
3026 // we can use things like CHHSI.
3027 if (!ConstOp1)
3028 return true;
3029 // The unsigned memory-immediate instructions can handle 16-bit
3030 // unsigned integers.
3031 if (C.ICmpType != SystemZICMP::SignedOnly &&
3032 isUInt<16>(ConstOp1->getZExtValue()))
3033 return false;
3034 // The signed memory-immediate instructions can handle 16-bit
3035 // signed integers.
3036 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3037 isInt<16>(ConstOp1->getSExtValue()))
3038 return false;
3039 return true;
3040 }
3041
3042 // Try to promote the use of CGFR and CLGFR.
3043 unsigned Opcode0 = C.Op0.getOpcode();
3044 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3045 return true;
3046 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3047 return true;
3048 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3049 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3050 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3051 return true;
3052
3053 return false;
3054}
3055
3056// Check whether C tests for equality between X and Y and whether X - Y
3057// or Y - X is also computed. In that case it's better to compare the
3058// result of the subtraction against zero.
3060 Comparison &C) {
3061 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3062 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3063 for (SDNode *N : C.Op0->users()) {
3064 if (N->getOpcode() == ISD::SUB &&
3065 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3066 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3067 // Disable the nsw and nuw flags: the backend needs to handle
3068 // overflow as well during comparison elimination.
3069 N->dropFlags(SDNodeFlags::NoWrap);
3070 C.Op0 = SDValue(N, 0);
3071 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3072 return;
3073 }
3074 }
3075 }
3076}
3077
3078// Check whether C compares a floating-point value with zero and if that
3079// floating-point value is also negated. In this case we can use the
3080// negation to set CC, so avoiding separate LOAD AND TEST and
3081// LOAD (NEGATIVE/COMPLEMENT) instructions.
3082static void adjustForFNeg(Comparison &C) {
3083 // This optimization is invalid for strict comparisons, since FNEG
3084 // does not raise any exceptions.
3085 if (C.Chain)
3086 return;
3087 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3088 if (C1 && C1->isZero()) {
3089 for (SDNode *N : C.Op0->users()) {
3090 if (N->getOpcode() == ISD::FNEG) {
3091 C.Op0 = SDValue(N, 0);
3092 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3093 return;
3094 }
3095 }
3096 }
3097}
3098
3099// Check whether C compares (shl X, 32) with 0 and whether X is
3100// also sign-extended. In that case it is better to test the result
3101// of the sign extension using LTGFR.
3102//
3103// This case is important because InstCombine transforms a comparison
3104// with (sext (trunc X)) into a comparison with (shl X, 32).
3105static void adjustForLTGFR(Comparison &C) {
3106 // Check for a comparison between (shl X, 32) and 0.
3107 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3108 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3109 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3110 if (C1 && C1->getZExtValue() == 32) {
3111 SDValue ShlOp0 = C.Op0.getOperand(0);
3112 // See whether X has any SIGN_EXTEND_INREG uses.
3113 for (SDNode *N : ShlOp0->users()) {
3114 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3115 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3116 C.Op0 = SDValue(N, 0);
3117 return;
3118 }
3119 }
3120 }
3121 }
3122}
3123
3124// If C compares the truncation of an extending load, try to compare
3125// the untruncated value instead. This exposes more opportunities to
3126// reuse CC.
3127static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3128 Comparison &C) {
3129 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3130 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3131 C.Op1.getOpcode() == ISD::Constant &&
3132 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3133 C.Op1->getAsZExtVal() == 0) {
3134 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3135 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3136 C.Op0.getValueSizeInBits().getFixedValue()) {
3137 unsigned Type = L->getExtensionType();
3138 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3139 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3140 C.Op0 = C.Op0.getOperand(0);
3141 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3142 }
3143 }
3144 }
3145}
3146
3147// Return true if shift operation N has an in-range constant shift value.
3148// Store it in ShiftVal if so.
3149static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3150 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3151 if (!Shift)
3152 return false;
3153
3154 uint64_t Amount = Shift->getZExtValue();
3155 if (Amount >= N.getValueSizeInBits())
3156 return false;
3157
3158 ShiftVal = Amount;
3159 return true;
3160}
3161
3162// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3163// instruction and whether the CC value is descriptive enough to handle
3164// a comparison of type Opcode between the AND result and CmpVal.
3165// CCMask says which comparison result is being tested and BitSize is
3166// the number of bits in the operands. If TEST UNDER MASK can be used,
3167// return the corresponding CC mask, otherwise return 0.
3168static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3169 uint64_t Mask, uint64_t CmpVal,
3170 unsigned ICmpType) {
3171 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3172
3173 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3174 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3175 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3176 return 0;
3177
3178 // Work out the masks for the lowest and highest bits.
3180 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3181
3182 // Signed ordered comparisons are effectively unsigned if the sign
3183 // bit is dropped.
3184 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3185
3186 // Check for equality comparisons with 0, or the equivalent.
3187 if (CmpVal == 0) {
3188 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3190 if (CCMask == SystemZ::CCMASK_CMP_NE)
3192 }
3193 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3194 if (CCMask == SystemZ::CCMASK_CMP_LT)
3196 if (CCMask == SystemZ::CCMASK_CMP_GE)
3198 }
3199 if (EffectivelyUnsigned && CmpVal < Low) {
3200 if (CCMask == SystemZ::CCMASK_CMP_LE)
3202 if (CCMask == SystemZ::CCMASK_CMP_GT)
3204 }
3205
3206 // Check for equality comparisons with the mask, or the equivalent.
3207 if (CmpVal == Mask) {
3208 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3210 if (CCMask == SystemZ::CCMASK_CMP_NE)
3212 }
3213 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3214 if (CCMask == SystemZ::CCMASK_CMP_GT)
3216 if (CCMask == SystemZ::CCMASK_CMP_LE)
3218 }
3219 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3220 if (CCMask == SystemZ::CCMASK_CMP_GE)
3222 if (CCMask == SystemZ::CCMASK_CMP_LT)
3224 }
3225
3226 // Check for ordered comparisons with the top bit.
3227 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3228 if (CCMask == SystemZ::CCMASK_CMP_LE)
3230 if (CCMask == SystemZ::CCMASK_CMP_GT)
3232 }
3233 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3234 if (CCMask == SystemZ::CCMASK_CMP_LT)
3236 if (CCMask == SystemZ::CCMASK_CMP_GE)
3238 }
3239
3240 // If there are just two bits, we can do equality checks for Low and High
3241 // as well.
3242 if (Mask == Low + High) {
3243 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3245 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3247 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3249 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3251 }
3252
3253 // Looks like we've exhausted our options.
3254 return 0;
3255}
3256
3257// See whether C can be implemented as a TEST UNDER MASK instruction.
3258// Update the arguments with the TM version if so.
3260 Comparison &C) {
3261 // Use VECTOR TEST UNDER MASK for i128 operations.
3262 if (C.Op0.getValueType() == MVT::i128) {
3263 // We can use VTM for EQ/NE comparisons of x & y against 0.
3264 if (C.Op0.getOpcode() == ISD::AND &&
3265 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3266 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3267 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3268 if (Mask && Mask->getAPIntValue() == 0) {
3269 C.Opcode = SystemZISD::VTM;
3270 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3271 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3272 C.CCValid = SystemZ::CCMASK_VCMP;
3273 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3274 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3275 else
3276 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3277 }
3278 }
3279 return;
3280 }
3281
3282 // Check that we have a comparison with a constant.
3283 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3284 if (!ConstOp1)
3285 return;
3286 uint64_t CmpVal = ConstOp1->getZExtValue();
3287
3288 // Check whether the nonconstant input is an AND with a constant mask.
3289 Comparison NewC(C);
3290 uint64_t MaskVal;
3291 ConstantSDNode *Mask = nullptr;
3292 if (C.Op0.getOpcode() == ISD::AND) {
3293 NewC.Op0 = C.Op0.getOperand(0);
3294 NewC.Op1 = C.Op0.getOperand(1);
3295 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3296 if (!Mask)
3297 return;
3298 MaskVal = Mask->getZExtValue();
3299 } else {
3300 // There is no instruction to compare with a 64-bit immediate
3301 // so use TMHH instead if possible. We need an unsigned ordered
3302 // comparison with an i64 immediate.
3303 if (NewC.Op0.getValueType() != MVT::i64 ||
3304 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3305 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3306 NewC.ICmpType == SystemZICMP::SignedOnly)
3307 return;
3308 // Convert LE and GT comparisons into LT and GE.
3309 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3310 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3311 if (CmpVal == uint64_t(-1))
3312 return;
3313 CmpVal += 1;
3314 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3315 }
3316 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3317 // be masked off without changing the result.
3318 MaskVal = -(CmpVal & -CmpVal);
3319 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3320 }
3321 if (!MaskVal)
3322 return;
3323
3324 // Check whether the combination of mask, comparison value and comparison
3325 // type are suitable.
3326 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3327 unsigned NewCCMask, ShiftVal;
3328 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3329 NewC.Op0.getOpcode() == ISD::SHL &&
3330 isSimpleShift(NewC.Op0, ShiftVal) &&
3331 (MaskVal >> ShiftVal != 0) &&
3332 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3333 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3334 MaskVal >> ShiftVal,
3335 CmpVal >> ShiftVal,
3336 SystemZICMP::Any))) {
3337 NewC.Op0 = NewC.Op0.getOperand(0);
3338 MaskVal >>= ShiftVal;
3339 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3340 NewC.Op0.getOpcode() == ISD::SRL &&
3341 isSimpleShift(NewC.Op0, ShiftVal) &&
3342 (MaskVal << ShiftVal != 0) &&
3343 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3344 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3345 MaskVal << ShiftVal,
3346 CmpVal << ShiftVal,
3348 NewC.Op0 = NewC.Op0.getOperand(0);
3349 MaskVal <<= ShiftVal;
3350 } else {
3351 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3352 NewC.ICmpType);
3353 if (!NewCCMask)
3354 return;
3355 }
3356
3357 // Go ahead and make the change.
3358 C.Opcode = SystemZISD::TM;
3359 C.Op0 = NewC.Op0;
3360 if (Mask && Mask->getZExtValue() == MaskVal)
3361 C.Op1 = SDValue(Mask, 0);
3362 else
3363 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3364 C.CCValid = SystemZ::CCMASK_TM;
3365 C.CCMask = NewCCMask;
3366}
3367
3368// Implement i128 comparison in vector registers.
3369static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3370 Comparison &C) {
3371 if (C.Opcode != SystemZISD::ICMP)
3372 return;
3373 if (C.Op0.getValueType() != MVT::i128)
3374 return;
3375
3376 // Recognize vector comparison reductions.
3377 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3378 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3379 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3380 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3381 bool CmpNull = isNullConstant(C.Op1);
3382 SDValue Src = peekThroughBitcasts(C.Op0);
3383 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3384 Src = Src.getOperand(0);
3385 CmpNull = !CmpNull;
3386 }
3387 unsigned Opcode = 0;
3388 if (Src.hasOneUse()) {
3389 switch (Src.getOpcode()) {
3390 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3391 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3392 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3393 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3394 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3395 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3396 default: break;
3397 }
3398 }
3399 if (Opcode) {
3400 C.Opcode = Opcode;
3401 C.Op0 = Src->getOperand(0);
3402 C.Op1 = Src->getOperand(1);
3403 C.CCValid = SystemZ::CCMASK_VCMP;
3405 if (!CmpEq)
3406 C.CCMask ^= C.CCValid;
3407 return;
3408 }
3409 }
3410
3411 // Everything below here is not useful if we have native i128 compares.
3412 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3413 return;
3414
3415 // (In-)Equality comparisons can be implemented via VCEQGS.
3416 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3417 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3418 C.Opcode = SystemZISD::VICMPES;
3419 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3420 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3421 C.CCValid = SystemZ::CCMASK_VCMP;
3422 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3423 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3424 else
3425 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3426 return;
3427 }
3428
3429 // Normalize other comparisons to GT.
3430 bool Swap = false, Invert = false;
3431 switch (C.CCMask) {
3432 case SystemZ::CCMASK_CMP_GT: break;
3433 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3434 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3435 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3436 default: llvm_unreachable("Invalid integer condition!");
3437 }
3438 if (Swap)
3439 std::swap(C.Op0, C.Op1);
3440
3441 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3442 C.Opcode = SystemZISD::UCMP128HI;
3443 else
3444 C.Opcode = SystemZISD::SCMP128HI;
3445 C.CCValid = SystemZ::CCMASK_ANY;
3446 C.CCMask = SystemZ::CCMASK_1;
3447
3448 if (Invert)
3449 C.CCMask ^= C.CCValid;
3450}
3451
3452// See whether the comparison argument contains a redundant AND
3453// and remove it if so. This sometimes happens due to the generic
3454// BRCOND expansion.
3456 Comparison &C) {
3457 if (C.Op0.getOpcode() != ISD::AND)
3458 return;
3459 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3460 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3461 return;
3462 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3463 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3464 return;
3465
3466 C.Op0 = C.Op0.getOperand(0);
3467}
3468
3469// Return a Comparison that tests the condition-code result of intrinsic
3470// node Call against constant integer CC using comparison code Cond.
3471// Opcode is the opcode of the SystemZISD operation for the intrinsic
3472// and CCValid is the set of possible condition-code results.
3473static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3474 SDValue Call, unsigned CCValid, uint64_t CC,
3476 Comparison C(Call, SDValue(), SDValue());
3477 C.Opcode = Opcode;
3478 C.CCValid = CCValid;
3479 if (Cond == ISD::SETEQ)
3480 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3481 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3482 else if (Cond == ISD::SETNE)
3483 // ...and the inverse of that.
3484 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3485 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3486 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3487 // always true for CC>3.
3488 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3489 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3490 // ...and the inverse of that.
3491 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3492 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3493 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3494 // always true for CC>3.
3495 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3496 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3497 // ...and the inverse of that.
3498 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3499 else
3500 llvm_unreachable("Unexpected integer comparison type");
3501 C.CCMask &= CCValid;
3502 return C;
3503}
3504
3505// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3506static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3507 ISD::CondCode Cond, const SDLoc &DL,
3508 SDValue Chain = SDValue(),
3509 bool IsSignaling = false) {
3510 if (CmpOp1.getOpcode() == ISD::Constant) {
3511 assert(!Chain);
3512 unsigned Opcode, CCValid;
3513 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3514 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3515 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3516 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3517 CmpOp1->getAsZExtVal(), Cond);
3518 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3519 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3520 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3521 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3522 CmpOp1->getAsZExtVal(), Cond);
3523 }
3524 Comparison C(CmpOp0, CmpOp1, Chain);
3525 C.CCMask = CCMaskForCondCode(Cond);
3526 if (C.Op0.getValueType().isFloatingPoint()) {
3527 C.CCValid = SystemZ::CCMASK_FCMP;
3528 if (!C.Chain)
3529 C.Opcode = SystemZISD::FCMP;
3530 else if (!IsSignaling)
3531 C.Opcode = SystemZISD::STRICT_FCMP;
3532 else
3533 C.Opcode = SystemZISD::STRICT_FCMPS;
3535 } else {
3536 assert(!C.Chain);
3537 C.CCValid = SystemZ::CCMASK_ICMP;
3538 C.Opcode = SystemZISD::ICMP;
3539 // Choose the type of comparison. Equality and inequality tests can
3540 // use either signed or unsigned comparisons. The choice also doesn't
3541 // matter if both sign bits are known to be clear. In those cases we
3542 // want to give the main isel code the freedom to choose whichever
3543 // form fits best.
3544 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3545 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3546 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3547 C.ICmpType = SystemZICMP::Any;
3548 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3549 C.ICmpType = SystemZICMP::UnsignedOnly;
3550 else
3551 C.ICmpType = SystemZICMP::SignedOnly;
3552 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3553 adjustForRedundantAnd(DAG, DL, C);
3554 adjustZeroCmp(DAG, DL, C);
3555 adjustSubwordCmp(DAG, DL, C);
3556 adjustForSubtraction(DAG, DL, C);
3558 adjustICmpTruncate(DAG, DL, C);
3559 }
3560
3561 if (shouldSwapCmpOperands(C)) {
3562 std::swap(C.Op0, C.Op1);
3563 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3564 }
3565
3567 adjustICmp128(DAG, DL, C);
3568 return C;
3569}
3570
3571// Emit the comparison instruction described by C.
3572static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3573 if (!C.Op1.getNode()) {
3574 SDNode *Node;
3575 switch (C.Op0.getOpcode()) {
3577 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3578 return SDValue(Node, 0);
3580 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3581 return SDValue(Node, Node->getNumValues() - 1);
3582 default:
3583 llvm_unreachable("Invalid comparison operands");
3584 }
3585 }
3586 if (C.Opcode == SystemZISD::ICMP)
3587 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3588 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3589 if (C.Opcode == SystemZISD::TM) {
3590 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3592 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3593 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3594 }
3595 if (C.Opcode == SystemZISD::VICMPES ||
3596 C.Opcode == SystemZISD::VICMPHS ||
3597 C.Opcode == SystemZISD::VICMPHLS ||
3598 C.Opcode == SystemZISD::VFCMPES ||
3599 C.Opcode == SystemZISD::VFCMPHS ||
3600 C.Opcode == SystemZISD::VFCMPHES) {
3601 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3602 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3603 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3604 return SDValue(Val.getNode(), 1);
3605 }
3606 if (C.Chain) {
3607 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3608 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3609 }
3610 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3611}
3612
3613// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3614// 64 bits. Extend is the extension type to use. Store the high part
3615// in Hi and the low part in Lo.
3616static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3617 SDValue Op0, SDValue Op1, SDValue &Hi,
3618 SDValue &Lo) {
3619 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3620 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3621 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3622 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3623 DAG.getConstant(32, DL, MVT::i64));
3624 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3625 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3626}
3627
3628// Lower a binary operation that produces two VT results, one in each
3629// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3630// and Opcode performs the GR128 operation. Store the even register result
3631// in Even and the odd register result in Odd.
3632static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3633 unsigned Opcode, SDValue Op0, SDValue Op1,
3634 SDValue &Even, SDValue &Odd) {
3635 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3636 bool Is32Bit = is32Bit(VT);
3637 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3638 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3639}
3640
3641// Return an i32 value that is 1 if the CC value produced by CCReg is
3642// in the mask CCMask and 0 otherwise. CC is known to have a value
3643// in CCValid, so other values can be ignored.
3644static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3645 unsigned CCValid, unsigned CCMask) {
3646 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3647 DAG.getConstant(0, DL, MVT::i32),
3648 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3649 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3650 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3651}
3652
3653// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3654// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3655// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3656// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3657// floating-point comparisons.
3660 switch (CC) {
3661 case ISD::SETOEQ:
3662 case ISD::SETEQ:
3663 switch (Mode) {
3664 case CmpMode::Int: return SystemZISD::VICMPE;
3665 case CmpMode::FP: return SystemZISD::VFCMPE;
3666 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3667 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3668 }
3669 llvm_unreachable("Bad mode");
3670
3671 case ISD::SETOGE:
3672 case ISD::SETGE:
3673 switch (Mode) {
3674 case CmpMode::Int: return 0;
3675 case CmpMode::FP: return SystemZISD::VFCMPHE;
3676 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3677 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3678 }
3679 llvm_unreachable("Bad mode");
3680
3681 case ISD::SETOGT:
3682 case ISD::SETGT:
3683 switch (Mode) {
3684 case CmpMode::Int: return SystemZISD::VICMPH;
3685 case CmpMode::FP: return SystemZISD::VFCMPH;
3686 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3687 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3688 }
3689 llvm_unreachable("Bad mode");
3690
3691 case ISD::SETUGT:
3692 switch (Mode) {
3693 case CmpMode::Int: return SystemZISD::VICMPHL;
3694 case CmpMode::FP: return 0;
3695 case CmpMode::StrictFP: return 0;
3696 case CmpMode::SignalingFP: return 0;
3697 }
3698 llvm_unreachable("Bad mode");
3699
3700 default:
3701 return 0;
3702 }
3703}
3704
3705// Return the SystemZISD vector comparison operation for CC or its inverse,
3706// or 0 if neither can be done directly. Indicate in Invert whether the
3707// result is for the inverse of CC. Mode is as above.
3709 bool &Invert) {
3710 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3711 Invert = false;
3712 return Opcode;
3713 }
3714
3715 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3716 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3717 Invert = true;
3718 return Opcode;
3719 }
3720
3721 return 0;
3722}
3723
3724// Return a v2f64 that contains the extended form of elements Start and Start+1
3725// of v4f32 value Op. If Chain is nonnull, return the strict form.
3726static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3727 SDValue Op, SDValue Chain) {
3728 int Mask[] = { Start, -1, Start + 1, -1 };
3729 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3730 if (Chain) {
3731 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3732 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3733 }
3734 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3735}
3736
3737// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3738// producing a result of type VT. If Chain is nonnull, return the strict form.
3739SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3740 const SDLoc &DL, EVT VT,
3741 SDValue CmpOp0,
3742 SDValue CmpOp1,
3743 SDValue Chain) const {
3744 // There is no hardware support for v4f32 (unless we have the vector
3745 // enhancements facility 1), so extend the vector into two v2f64s
3746 // and compare those.
3747 if (CmpOp0.getValueType() == MVT::v4f32 &&
3748 !Subtarget.hasVectorEnhancements1()) {
3749 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3750 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3751 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3752 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3753 if (Chain) {
3754 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3755 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3756 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3757 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3758 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3759 H1.getValue(1), L1.getValue(1),
3760 HRes.getValue(1), LRes.getValue(1) };
3761 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3762 SDValue Ops[2] = { Res, NewChain };
3763 return DAG.getMergeValues(Ops, DL);
3764 }
3765 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3766 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3767 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3768 }
3769 if (Chain) {
3770 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3771 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3772 }
3773 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3774}
3775
3776// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3777// an integer mask of type VT. If Chain is nonnull, we have a strict
3778// floating-point comparison. If in addition IsSignaling is true, we have
3779// a strict signaling floating-point comparison.
3780SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3781 const SDLoc &DL, EVT VT,
3782 ISD::CondCode CC,
3783 SDValue CmpOp0,
3784 SDValue CmpOp1,
3785 SDValue Chain,
3786 bool IsSignaling) const {
3787 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3788 assert (!Chain || IsFP);
3789 assert (!IsSignaling || Chain);
3790 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3791 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3792 bool Invert = false;
3793 SDValue Cmp;
3794 switch (CC) {
3795 // Handle tests for order using (or (ogt y x) (oge x y)).
3796 case ISD::SETUO:
3797 Invert = true;
3798 [[fallthrough]];
3799 case ISD::SETO: {
3800 assert(IsFP && "Unexpected integer comparison");
3801 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3802 DL, VT, CmpOp1, CmpOp0, Chain);
3803 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3804 DL, VT, CmpOp0, CmpOp1, Chain);
3805 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3806 if (Chain)
3807 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3808 LT.getValue(1), GE.getValue(1));
3809 break;
3810 }
3811
3812 // Handle <> tests using (or (ogt y x) (ogt x y)).
3813 case ISD::SETUEQ:
3814 Invert = true;
3815 [[fallthrough]];
3816 case ISD::SETONE: {
3817 assert(IsFP && "Unexpected integer comparison");
3818 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3819 DL, VT, CmpOp1, CmpOp0, Chain);
3820 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3821 DL, VT, CmpOp0, CmpOp1, Chain);
3822 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3823 if (Chain)
3824 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3825 LT.getValue(1), GT.getValue(1));
3826 break;
3827 }
3828
3829 // Otherwise a single comparison is enough. It doesn't really
3830 // matter whether we try the inversion or the swap first, since
3831 // there are no cases where both work.
3832 default:
3833 // Optimize sign-bit comparisons to signed compares.
3834 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3836 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3837 APInt Mask;
3838 if (CmpOp0.getOpcode() == ISD::AND
3839 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3840 && Mask == APInt::getSignMask(EltSize)) {
3841 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3842 CmpOp0 = CmpOp0.getOperand(0);
3843 }
3844 }
3845 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3846 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3847 else {
3849 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3850 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3851 else
3852 llvm_unreachable("Unhandled comparison");
3853 }
3854 if (Chain)
3855 Chain = Cmp.getValue(1);
3856 break;
3857 }
3858 if (Invert) {
3859 SDValue Mask =
3860 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3861 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3862 }
3863 if (Chain && Chain.getNode() != Cmp.getNode()) {
3864 SDValue Ops[2] = { Cmp, Chain };
3865 Cmp = DAG.getMergeValues(Ops, DL);
3866 }
3867 return Cmp;
3868}
3869
3870SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3871 SelectionDAG &DAG) const {
3872 SDValue CmpOp0 = Op.getOperand(0);
3873 SDValue CmpOp1 = Op.getOperand(1);
3874 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3875 SDLoc DL(Op);
3876 EVT VT = Op.getValueType();
3877 if (VT.isVector())
3878 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3879
3880 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3881 SDValue CCReg = emitCmp(DAG, DL, C);
3882 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3883}
3884
3885SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3886 SelectionDAG &DAG,
3887 bool IsSignaling) const {
3888 SDValue Chain = Op.getOperand(0);
3889 SDValue CmpOp0 = Op.getOperand(1);
3890 SDValue CmpOp1 = Op.getOperand(2);
3891 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3892 SDLoc DL(Op);
3893 EVT VT = Op.getNode()->getValueType(0);
3894 if (VT.isVector()) {
3895 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3896 Chain, IsSignaling);
3897 return Res.getValue(Op.getResNo());
3898 }
3899
3900 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3901 SDValue CCReg = emitCmp(DAG, DL, C);
3902 CCReg->setFlags(Op->getFlags());
3903 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3904 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3905 return DAG.getMergeValues(Ops, DL);
3906}
3907
3908SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3909 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3910 SDValue CmpOp0 = Op.getOperand(2);
3911 SDValue CmpOp1 = Op.getOperand(3);
3912 SDValue Dest = Op.getOperand(4);
3913 SDLoc DL(Op);
3914
3915 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3916 SDValue CCReg = emitCmp(DAG, DL, C);
3917 return DAG.getNode(
3918 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3919 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3920 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3921}
3922
3923// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3924// allowing Pos and Neg to be wider than CmpOp.
3925static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3926 return (Neg.getOpcode() == ISD::SUB &&
3927 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3928 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3929 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3930 Pos.getOperand(0) == CmpOp)));
3931}
3932
3933// Return the absolute or negative absolute of Op; IsNegative decides which.
3935 bool IsNegative) {
3936 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3937 if (IsNegative)
3938 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3939 DAG.getConstant(0, DL, Op.getValueType()), Op);
3940 return Op;
3941}
3942
3944 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3945 EVT VT = MVT::i128;
3946 unsigned Op;
3947
3948 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3949 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3950 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3951 std::swap(TrueOp, FalseOp);
3952 C.CCMask ^= C.CCValid;
3953 }
3954 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3955 std::swap(C.Op0, C.Op1);
3956 C.CCMask = SystemZ::CCMASK_CMP_GT;
3957 }
3958 switch (C.CCMask) {
3960 Op = SystemZISD::VICMPE;
3961 break;
3963 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3964 Op = SystemZISD::VICMPHL;
3965 else
3966 Op = SystemZISD::VICMPH;
3967 break;
3968 default:
3969 llvm_unreachable("Unhandled comparison");
3970 break;
3971 }
3972
3973 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3974 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3975 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3976 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3977}
3978
3979SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3980 SelectionDAG &DAG) const {
3981 SDValue CmpOp0 = Op.getOperand(0);
3982 SDValue CmpOp1 = Op.getOperand(1);
3983 SDValue TrueOp = Op.getOperand(2);
3984 SDValue FalseOp = Op.getOperand(3);
3985 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3986 SDLoc DL(Op);
3987
3988 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3989 // legalizer, as it will be handled according to the type of the resulting
3990 // value. Extend them here if needed.
3991 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3992 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
3993 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
3994 }
3995
3996 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3997
3998 // Check for absolute and negative-absolute selections, including those
3999 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4000 // This check supplements the one in DAGCombiner.
4001 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4002 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4003 C.Op1.getOpcode() == ISD::Constant &&
4004 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4005 C.Op1->getAsZExtVal() == 0) {
4006 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4007 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4008 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4009 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4010 }
4011
4012 if (Subtarget.hasVectorEnhancements3() &&
4013 C.Opcode == SystemZISD::ICMP &&
4014 C.Op0.getValueType() == MVT::i128 &&
4015 TrueOp.getValueType() == MVT::i128) {
4016 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4017 }
4018
4019 SDValue CCReg = emitCmp(DAG, DL, C);
4020 SDValue Ops[] = {TrueOp, FalseOp,
4021 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4022 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4023
4024 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4025}
4026
4027SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4028 SelectionDAG &DAG) const {
4029 SDLoc DL(Node);
4030 const GlobalValue *GV = Node->getGlobal();
4031 int64_t Offset = Node->getOffset();
4032 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4034
4036 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4037 if (isInt<32>(Offset)) {
4038 // Assign anchors at 1<<12 byte boundaries.
4039 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4040 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4041 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4042
4043 // The offset can be folded into the address if it is aligned to a
4044 // halfword.
4045 Offset -= Anchor;
4046 if (Offset != 0 && (Offset & 1) == 0) {
4047 SDValue Full =
4048 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4049 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4050 Offset = 0;
4051 }
4052 } else {
4053 // Conservatively load a constant offset greater than 32 bits into a
4054 // register below.
4055 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4056 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4057 }
4058 } else if (Subtarget.isTargetELF()) {
4059 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4060 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4061 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4063 } else if (Subtarget.isTargetzOS()) {
4064 Result = getADAEntry(DAG, GV, DL, PtrVT);
4065 } else
4066 llvm_unreachable("Unexpected Subtarget");
4067
4068 // If there was a non-zero offset that we didn't fold, create an explicit
4069 // addition for it.
4070 if (Offset != 0)
4071 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4072 DAG.getSignedConstant(Offset, DL, PtrVT));
4073
4074 return Result;
4075}
4076
4077SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4078 SelectionDAG &DAG,
4079 unsigned Opcode,
4080 SDValue GOTOffset) const {
4081 SDLoc DL(Node);
4082 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4083 SDValue Chain = DAG.getEntryNode();
4084 SDValue Glue;
4085
4088 report_fatal_error("In GHC calling convention TLS is not supported");
4089
4090 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4091 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4092 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4093 Glue = Chain.getValue(1);
4094 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4095 Glue = Chain.getValue(1);
4096
4097 // The first call operand is the chain and the second is the TLS symbol.
4099 Ops.push_back(Chain);
4100 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4101 Node->getValueType(0),
4102 0, 0));
4103
4104 // Add argument registers to the end of the list so that they are
4105 // known live into the call.
4106 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4107 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4108
4109 // Add a register mask operand representing the call-preserved registers.
4110 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4111 const uint32_t *Mask =
4112 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4113 assert(Mask && "Missing call preserved mask for calling convention");
4114 Ops.push_back(DAG.getRegisterMask(Mask));
4115
4116 // Glue the call to the argument copies.
4117 Ops.push_back(Glue);
4118
4119 // Emit the call.
4120 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4121 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4122 Glue = Chain.getValue(1);
4123
4124 // Copy the return value from %r2.
4125 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4126}
4127
4128SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4129 SelectionDAG &DAG) const {
4130 SDValue Chain = DAG.getEntryNode();
4131 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4132
4133 // The high part of the thread pointer is in access register 0.
4134 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4135 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4136
4137 // The low part of the thread pointer is in access register 1.
4138 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4139 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4140
4141 // Merge them into a single 64-bit address.
4142 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4143 DAG.getConstant(32, DL, PtrVT));
4144 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4145}
4146
4147SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4148 SelectionDAG &DAG) const {
4149 if (DAG.getTarget().useEmulatedTLS())
4150 return LowerToTLSEmulatedModel(Node, DAG);
4151 SDLoc DL(Node);
4152 const GlobalValue *GV = Node->getGlobal();
4153 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4154 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4155
4158 report_fatal_error("In GHC calling convention TLS is not supported");
4159
4160 SDValue TP = lowerThreadPointer(DL, DAG);
4161
4162 // Get the offset of GA from the thread pointer, based on the TLS model.
4164 switch (model) {
4166 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4167 SystemZConstantPoolValue *CPV =
4169
4170 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4171 Offset = DAG.getLoad(
4172 PtrVT, DL, DAG.getEntryNode(), Offset,
4174
4175 // Call __tls_get_offset to retrieve the offset.
4176 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4177 break;
4178 }
4179
4181 // Load the GOT offset of the module ID.
4182 SystemZConstantPoolValue *CPV =
4184
4185 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4186 Offset = DAG.getLoad(
4187 PtrVT, DL, DAG.getEntryNode(), Offset,
4189
4190 // Call __tls_get_offset to retrieve the module base offset.
4191 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4192
4193 // Note: The SystemZLDCleanupPass will remove redundant computations
4194 // of the module base offset. Count total number of local-dynamic
4195 // accesses to trigger execution of that pass.
4196 SystemZMachineFunctionInfo* MFI =
4197 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4199
4200 // Add the per-symbol offset.
4202
4203 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4204 DTPOffset = DAG.getLoad(
4205 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4207
4208 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4209 break;
4210 }
4211
4212 case TLSModel::InitialExec: {
4213 // Load the offset from the GOT.
4214 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4216 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4217 Offset =
4218 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4220 break;
4221 }
4222
4223 case TLSModel::LocalExec: {
4224 // Force the offset into the constant pool and load it from there.
4225 SystemZConstantPoolValue *CPV =
4227
4228 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4229 Offset = DAG.getLoad(
4230 PtrVT, DL, DAG.getEntryNode(), Offset,
4232 break;
4233 }
4234 }
4235
4236 // Add the base and offset together.
4237 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4238}
4239
4240SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4241 SelectionDAG &DAG) const {
4242 SDLoc DL(Node);
4243 const BlockAddress *BA = Node->getBlockAddress();
4244 int64_t Offset = Node->getOffset();
4245 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4246
4247 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4248 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4249 return Result;
4250}
4251
4252SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4253 SelectionDAG &DAG) const {
4254 SDLoc DL(JT);
4255 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4256 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4257
4258 // Use LARL to load the address of the table.
4259 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4260}
4261
4262SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4263 SelectionDAG &DAG) const {
4264 SDLoc DL(CP);
4265 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4266
4268 if (CP->isMachineConstantPoolEntry())
4269 Result =
4270 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4271 else
4272 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4273 CP->getOffset());
4274
4275 // Use LARL to load the address of the constant pool entry.
4276 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4277}
4278
4279SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4280 SelectionDAG &DAG) const {
4281 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4282 MachineFunction &MF = DAG.getMachineFunction();
4283 MachineFrameInfo &MFI = MF.getFrameInfo();
4284 MFI.setFrameAddressIsTaken(true);
4285
4286 SDLoc DL(Op);
4287 unsigned Depth = Op.getConstantOperandVal(0);
4288 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4289
4290 // By definition, the frame address is the address of the back chain. (In
4291 // the case of packed stack without backchain, return the address where the
4292 // backchain would have been stored. This will either be an unused space or
4293 // contain a saved register).
4294 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4295 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4296
4297 if (Depth > 0) {
4298 // FIXME The frontend should detect this case.
4299 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4300 report_fatal_error("Unsupported stack frame traversal count");
4301
4302 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4303 while (Depth--) {
4304 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4305 MachinePointerInfo());
4306 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4307 }
4308 }
4309
4310 return BackChain;
4311}
4312
4313SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4314 SelectionDAG &DAG) const {
4315 MachineFunction &MF = DAG.getMachineFunction();
4316 MachineFrameInfo &MFI = MF.getFrameInfo();
4317 MFI.setReturnAddressIsTaken(true);
4318
4319 SDLoc DL(Op);
4320 unsigned Depth = Op.getConstantOperandVal(0);
4321 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4322
4323 if (Depth > 0) {
4324 // FIXME The frontend should detect this case.
4325 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4326 report_fatal_error("Unsupported stack frame traversal count");
4327
4328 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4329 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4330 int Offset = TFL->getReturnAddressOffset(MF);
4331 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4332 DAG.getSignedConstant(Offset, DL, PtrVT));
4333 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4334 MachinePointerInfo());
4335 }
4336
4337 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4338 // implicit live-in.
4339 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4341 &SystemZ::GR64BitRegClass);
4342 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4343}
4344
4345SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4346 SelectionDAG &DAG) const {
4347 SDLoc DL(Op);
4348 SDValue In = Op.getOperand(0);
4349 EVT InVT = In.getValueType();
4350 EVT ResVT = Op.getValueType();
4351
4352 // Convert loads directly. This is normally done by DAGCombiner,
4353 // but we need this case for bitcasts that are created during lowering
4354 // and which are then lowered themselves.
4355 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4356 if (ISD::isNormalLoad(LoadN)) {
4357 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4358 LoadN->getBasePtr(), LoadN->getMemOperand());
4359 // Update the chain uses.
4360 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4361 return NewLoad;
4362 }
4363
4364 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4365 SDValue In64;
4366 if (Subtarget.hasHighWord()) {
4367 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4368 MVT::i64);
4369 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4370 MVT::i64, SDValue(U64, 0), In);
4371 } else {
4372 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4373 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4374 DAG.getConstant(32, DL, MVT::i64));
4375 }
4376 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4377 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4378 DL, MVT::f32, Out64);
4379 }
4380 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4381 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4382 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4383 MVT::f64, SDValue(U64, 0), In);
4384 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4385 if (Subtarget.hasHighWord())
4386 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4387 MVT::i32, Out64);
4388 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4389 DAG.getConstant(32, DL, MVT::i64));
4390 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4391 }
4392 llvm_unreachable("Unexpected bitcast combination");
4393}
4394
4395SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4396 SelectionDAG &DAG) const {
4397
4398 if (Subtarget.isTargetXPLINK64())
4399 return lowerVASTART_XPLINK(Op, DAG);
4400 else
4401 return lowerVASTART_ELF(Op, DAG);
4402}
4403
4404SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4405 SelectionDAG &DAG) const {
4406 MachineFunction &MF = DAG.getMachineFunction();
4407 SystemZMachineFunctionInfo *FuncInfo =
4408 MF.getInfo<SystemZMachineFunctionInfo>();
4409
4410 SDLoc DL(Op);
4411
4412 // vastart just stores the address of the VarArgsFrameIndex slot into the
4413 // memory location argument.
4414 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4415 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4416 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4417 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4418 MachinePointerInfo(SV));
4419}
4420
4421SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4422 SelectionDAG &DAG) const {
4423 MachineFunction &MF = DAG.getMachineFunction();
4424 SystemZMachineFunctionInfo *FuncInfo =
4425 MF.getInfo<SystemZMachineFunctionInfo>();
4426 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4427
4428 SDValue Chain = Op.getOperand(0);
4429 SDValue Addr = Op.getOperand(1);
4430 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4431 SDLoc DL(Op);
4432
4433 // The initial values of each field.
4434 const unsigned NumFields = 4;
4435 SDValue Fields[NumFields] = {
4436 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4437 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4438 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4439 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4440 };
4441
4442 // Store each field into its respective slot.
4443 SDValue MemOps[NumFields];
4444 unsigned Offset = 0;
4445 for (unsigned I = 0; I < NumFields; ++I) {
4446 SDValue FieldAddr = Addr;
4447 if (Offset != 0)
4448 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4450 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4451 MachinePointerInfo(SV, Offset));
4452 Offset += 8;
4453 }
4454 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4455}
4456
4457SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4458 SelectionDAG &DAG) const {
4459 SDValue Chain = Op.getOperand(0);
4460 SDValue DstPtr = Op.getOperand(1);
4461 SDValue SrcPtr = Op.getOperand(2);
4462 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4463 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4464 SDLoc DL(Op);
4465
4466 uint32_t Sz =
4467 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4468 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4469 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4470 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4471 MachinePointerInfo(SrcSV));
4472}
4473
4474SDValue
4475SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4476 SelectionDAG &DAG) const {
4477 if (Subtarget.isTargetXPLINK64())
4478 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4479 else
4480 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4481}
4482
4483SDValue
4484SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4485 SelectionDAG &DAG) const {
4486 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4487 MachineFunction &MF = DAG.getMachineFunction();
4488 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4489 SDValue Chain = Op.getOperand(0);
4490 SDValue Size = Op.getOperand(1);
4491 SDValue Align = Op.getOperand(2);
4492 SDLoc DL(Op);
4493
4494 // If user has set the no alignment function attribute, ignore
4495 // alloca alignments.
4496 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4497
4498 uint64_t StackAlign = TFI->getStackAlignment();
4499 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4500 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4501
4502 SDValue NeededSpace = Size;
4503
4504 // Add extra space for alignment if needed.
4505 EVT PtrVT = getPointerTy(MF.getDataLayout());
4506 if (ExtraAlignSpace)
4507 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4508 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4509
4510 bool IsSigned = false;
4511 bool DoesNotReturn = false;
4512 bool IsReturnValueUsed = false;
4513 EVT VT = Op.getValueType();
4514 SDValue AllocaCall =
4515 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4516 CallingConv::C, IsSigned, DL, DoesNotReturn,
4517 IsReturnValueUsed)
4518 .first;
4519
4520 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4521 // to end of call in order to ensure it isn't broken up from the call
4522 // sequence.
4523 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4524 Register SPReg = Regs.getStackPointerRegister();
4525 Chain = AllocaCall.getValue(1);
4526 SDValue Glue = AllocaCall.getValue(2);
4527 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4528 Chain = NewSPRegNode.getValue(1);
4529
4530 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4531 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4532 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4533
4534 // Dynamically realign if needed.
4535 if (ExtraAlignSpace) {
4536 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4537 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4538 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4539 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4540 }
4541
4542 SDValue Ops[2] = {Result, Chain};
4543 return DAG.getMergeValues(Ops, DL);
4544}
4545
4546SDValue
4547SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4548 SelectionDAG &DAG) const {
4549 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4550 MachineFunction &MF = DAG.getMachineFunction();
4551 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4552 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4553
4554 SDValue Chain = Op.getOperand(0);
4555 SDValue Size = Op.getOperand(1);
4556 SDValue Align = Op.getOperand(2);
4557 SDLoc DL(Op);
4558
4559 // If user has set the no alignment function attribute, ignore
4560 // alloca alignments.
4561 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4562
4563 uint64_t StackAlign = TFI->getStackAlignment();
4564 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4565 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4566
4568 SDValue NeededSpace = Size;
4569
4570 // Get a reference to the stack pointer.
4571 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4572
4573 // If we need a backchain, save it now.
4574 SDValue Backchain;
4575 if (StoreBackchain)
4576 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4577 MachinePointerInfo());
4578
4579 // Add extra space for alignment if needed.
4580 if (ExtraAlignSpace)
4581 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4582 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4583
4584 // Get the new stack pointer value.
4585 SDValue NewSP;
4586 if (hasInlineStackProbe(MF)) {
4587 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4588 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4589 Chain = NewSP.getValue(1);
4590 }
4591 else {
4592 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4593 // Copy the new stack pointer back.
4594 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4595 }
4596
4597 // The allocated data lives above the 160 bytes allocated for the standard
4598 // frame, plus any outgoing stack arguments. We don't know how much that
4599 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4600 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4601 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4602
4603 // Dynamically realign if needed.
4604 if (RequiredAlign > StackAlign) {
4605 Result =
4606 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4607 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4608 Result =
4609 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4610 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4611 }
4612
4613 if (StoreBackchain)
4614 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4615 MachinePointerInfo());
4616
4617 SDValue Ops[2] = { Result, Chain };
4618 return DAG.getMergeValues(Ops, DL);
4619}
4620
4621SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4622 SDValue Op, SelectionDAG &DAG) const {
4623 SDLoc DL(Op);
4624
4625 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4626}
4627
4628SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4629 SelectionDAG &DAG,
4630 unsigned Opcode) const {
4631 EVT VT = Op.getValueType();
4632 SDLoc DL(Op);
4633 SDValue Even, Odd;
4634
4635 // This custom expander is only used on z17 and later for 64-bit types.
4636 assert(!is32Bit(VT));
4637 assert(Subtarget.hasMiscellaneousExtensions2());
4638
4639 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4640 // the high result in the even register. Return the latter.
4641 lowerGR128Binary(DAG, DL, VT, Opcode,
4642 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4643 return Even;
4644}
4645
4646SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4647 SelectionDAG &DAG) const {
4648 EVT VT = Op.getValueType();
4649 SDLoc DL(Op);
4650 SDValue Ops[2];
4651 if (is32Bit(VT))
4652 // Just do a normal 64-bit multiplication and extract the results.
4653 // We define this so that it can be used for constant division.
4654 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4655 Op.getOperand(1), Ops[1], Ops[0]);
4656 else if (Subtarget.hasMiscellaneousExtensions2())
4657 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4658 // the high result in the even register. ISD::SMUL_LOHI is defined to
4659 // return the low half first, so the results are in reverse order.
4660 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4661 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4662 else {
4663 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4664 //
4665 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4666 //
4667 // but using the fact that the upper halves are either all zeros
4668 // or all ones:
4669 //
4670 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4671 //
4672 // and grouping the right terms together since they are quicker than the
4673 // multiplication:
4674 //
4675 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4676 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4677 SDValue LL = Op.getOperand(0);
4678 SDValue RL = Op.getOperand(1);
4679 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4680 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4681 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4682 // the high result in the even register. ISD::SMUL_LOHI is defined to
4683 // return the low half first, so the results are in reverse order.
4684 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4685 LL, RL, Ops[1], Ops[0]);
4686 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4687 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4688 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4689 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4690 }
4691 return DAG.getMergeValues(Ops, DL);
4692}
4693
4694SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4695 SelectionDAG &DAG) const {
4696 EVT VT = Op.getValueType();
4697 SDLoc DL(Op);
4698 SDValue Ops[2];
4699 if (is32Bit(VT))
4700 // Just do a normal 64-bit multiplication and extract the results.
4701 // We define this so that it can be used for constant division.
4702 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4703 Op.getOperand(1), Ops[1], Ops[0]);
4704 else
4705 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4706 // the high result in the even register. ISD::UMUL_LOHI is defined to
4707 // return the low half first, so the results are in reverse order.
4708 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4709 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4710 return DAG.getMergeValues(Ops, DL);
4711}
4712
4713SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4714 SelectionDAG &DAG) const {
4715 SDValue Op0 = Op.getOperand(0);
4716 SDValue Op1 = Op.getOperand(1);
4717 EVT VT = Op.getValueType();
4718 SDLoc DL(Op);
4719
4720 // We use DSGF for 32-bit division. This means the first operand must
4721 // always be 64-bit, and the second operand should be 32-bit whenever
4722 // that is possible, to improve performance.
4723 if (is32Bit(VT))
4724 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4725 else if (DAG.ComputeNumSignBits(Op1) > 32)
4726 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4727
4728 // DSG(F) returns the remainder in the even register and the
4729 // quotient in the odd register.
4730 SDValue Ops[2];
4731 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4732 return DAG.getMergeValues(Ops, DL);
4733}
4734
4735SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4736 SelectionDAG &DAG) const {
4737 EVT VT = Op.getValueType();
4738 SDLoc DL(Op);
4739
4740 // DL(G) returns the remainder in the even register and the
4741 // quotient in the odd register.
4742 SDValue Ops[2];
4743 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4744 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4745 return DAG.getMergeValues(Ops, DL);
4746}
4747
4748SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4749 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4750
4751 // Get the known-zero masks for each operand.
4752 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4753 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4754 DAG.computeKnownBits(Ops[1])};
4755
4756 // See if the upper 32 bits of one operand and the lower 32 bits of the
4757 // other are known zero. They are the low and high operands respectively.
4758 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4759 Known[1].Zero.getZExtValue() };
4760 unsigned High, Low;
4761 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4762 High = 1, Low = 0;
4763 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4764 High = 0, Low = 1;
4765 else
4766 return Op;
4767
4768 SDValue LowOp = Ops[Low];
4769 SDValue HighOp = Ops[High];
4770
4771 // If the high part is a constant, we're better off using IILH.
4772 if (HighOp.getOpcode() == ISD::Constant)
4773 return Op;
4774
4775 // If the low part is a constant that is outside the range of LHI,
4776 // then we're better off using IILF.
4777 if (LowOp.getOpcode() == ISD::Constant) {
4778 int64_t Value = int32_t(LowOp->getAsZExtVal());
4779 if (!isInt<16>(Value))
4780 return Op;
4781 }
4782
4783 // Check whether the high part is an AND that doesn't change the
4784 // high 32 bits and just masks out low bits. We can skip it if so.
4785 if (HighOp.getOpcode() == ISD::AND &&
4786 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4787 SDValue HighOp0 = HighOp.getOperand(0);
4788 uint64_t Mask = HighOp.getConstantOperandVal(1);
4789 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4790 HighOp = HighOp0;
4791 }
4792
4793 // Take advantage of the fact that all GR32 operations only change the
4794 // low 32 bits by truncating Low to an i32 and inserting it directly
4795 // using a subreg. The interesting cases are those where the truncation
4796 // can be folded.
4797 SDLoc DL(Op);
4798 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4799 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4800 MVT::i64, HighOp, Low32);
4801}
4802
4803// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4804SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4805 SelectionDAG &DAG) const {
4806 SDNode *N = Op.getNode();
4807 SDValue LHS = N->getOperand(0);
4808 SDValue RHS = N->getOperand(1);
4809 SDLoc DL(N);
4810
4811 if (N->getValueType(0) == MVT::i128) {
4812 unsigned BaseOp = 0;
4813 unsigned FlagOp = 0;
4814 bool IsBorrow = false;
4815 switch (Op.getOpcode()) {
4816 default: llvm_unreachable("Unknown instruction!");
4817 case ISD::UADDO:
4818 BaseOp = ISD::ADD;
4819 FlagOp = SystemZISD::VACC;
4820 break;
4821 case ISD::USUBO:
4822 BaseOp = ISD::SUB;
4823 FlagOp = SystemZISD::VSCBI;
4824 IsBorrow = true;
4825 break;
4826 }
4827 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4828 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4829 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4830 DAG.getValueType(MVT::i1));
4831 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4832 if (IsBorrow)
4833 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4834 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4835 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4836 }
4837
4838 unsigned BaseOp = 0;
4839 unsigned CCValid = 0;
4840 unsigned CCMask = 0;
4841
4842 switch (Op.getOpcode()) {
4843 default: llvm_unreachable("Unknown instruction!");
4844 case ISD::SADDO:
4845 BaseOp = SystemZISD::SADDO;
4846 CCValid = SystemZ::CCMASK_ARITH;
4848 break;
4849 case ISD::SSUBO:
4850 BaseOp = SystemZISD::SSUBO;
4851 CCValid = SystemZ::CCMASK_ARITH;
4853 break;
4854 case ISD::UADDO:
4855 BaseOp = SystemZISD::UADDO;
4856 CCValid = SystemZ::CCMASK_LOGICAL;
4858 break;
4859 case ISD::USUBO:
4860 BaseOp = SystemZISD::USUBO;
4861 CCValid = SystemZ::CCMASK_LOGICAL;
4863 break;
4864 }
4865
4866 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4867 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4868
4869 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4870 if (N->getValueType(1) == MVT::i1)
4871 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4872
4873 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4874}
4875
4876static bool isAddCarryChain(SDValue Carry) {
4877 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4878 Carry->getValueType(0) != MVT::i128)
4879 Carry = Carry.getOperand(2);
4880 return Carry.getOpcode() == ISD::UADDO &&
4881 Carry->getValueType(0) != MVT::i128;
4882}
4883
4884static bool isSubBorrowChain(SDValue Carry) {
4885 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4886 Carry->getValueType(0) != MVT::i128)
4887 Carry = Carry.getOperand(2);
4888 return Carry.getOpcode() == ISD::USUBO &&
4889 Carry->getValueType(0) != MVT::i128;
4890}
4891
4892// Lower UADDO_CARRY/USUBO_CARRY nodes.
4893SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4894 SelectionDAG &DAG) const {
4895
4896 SDNode *N = Op.getNode();
4897 MVT VT = N->getSimpleValueType(0);
4898
4899 // Let legalize expand this if it isn't a legal type yet.
4900 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4901 return SDValue();
4902
4903 SDValue LHS = N->getOperand(0);
4904 SDValue RHS = N->getOperand(1);
4905 SDValue Carry = Op.getOperand(2);
4906 SDLoc DL(N);
4907
4908 if (VT == MVT::i128) {
4909 unsigned BaseOp = 0;
4910 unsigned FlagOp = 0;
4911 bool IsBorrow = false;
4912 switch (Op.getOpcode()) {
4913 default: llvm_unreachable("Unknown instruction!");
4914 case ISD::UADDO_CARRY:
4915 BaseOp = SystemZISD::VAC;
4916 FlagOp = SystemZISD::VACCC;
4917 break;
4918 case ISD::USUBO_CARRY:
4919 BaseOp = SystemZISD::VSBI;
4920 FlagOp = SystemZISD::VSBCBI;
4921 IsBorrow = true;
4922 break;
4923 }
4924 if (IsBorrow)
4925 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4926 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4927 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4928 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4929 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4930 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4931 DAG.getValueType(MVT::i1));
4932 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4933 if (IsBorrow)
4934 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4935 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4936 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4937 }
4938
4939 unsigned BaseOp = 0;
4940 unsigned CCValid = 0;
4941 unsigned CCMask = 0;
4942
4943 switch (Op.getOpcode()) {
4944 default: llvm_unreachable("Unknown instruction!");
4945 case ISD::UADDO_CARRY:
4946 if (!isAddCarryChain(Carry))
4947 return SDValue();
4948
4949 BaseOp = SystemZISD::ADDCARRY;
4950 CCValid = SystemZ::CCMASK_LOGICAL;
4952 break;
4953 case ISD::USUBO_CARRY:
4954 if (!isSubBorrowChain(Carry))
4955 return SDValue();
4956
4957 BaseOp = SystemZISD::SUBCARRY;
4958 CCValid = SystemZ::CCMASK_LOGICAL;
4960 break;
4961 }
4962
4963 // Set the condition code from the carry flag.
4964 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4965 DAG.getConstant(CCValid, DL, MVT::i32),
4966 DAG.getConstant(CCMask, DL, MVT::i32));
4967
4968 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4969 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4970
4971 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4972 if (N->getValueType(1) == MVT::i1)
4973 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4974
4975 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4976}
4977
4978SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4979 SelectionDAG &DAG) const {
4980 EVT VT = Op.getValueType();
4981 SDLoc DL(Op);
4982 Op = Op.getOperand(0);
4983
4984 if (VT.getScalarSizeInBits() == 128) {
4985 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4986 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4987 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4988 DAG.getConstant(0, DL, MVT::i64));
4989 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4990 return Op;
4991 }
4992
4993 // Handle vector types via VPOPCT.
4994 if (VT.isVector()) {
4995 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4996 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4997 switch (VT.getScalarSizeInBits()) {
4998 case 8:
4999 break;
5000 case 16: {
5001 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5002 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5003 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5004 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5005 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5006 break;
5007 }
5008 case 32: {
5009 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5010 DAG.getConstant(0, DL, MVT::i32));
5011 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5012 break;
5013 }
5014 case 64: {
5015 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5016 DAG.getConstant(0, DL, MVT::i32));
5017 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5018 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5019 break;
5020 }
5021 default:
5022 llvm_unreachable("Unexpected type");
5023 }
5024 return Op;
5025 }
5026
5027 // Get the known-zero mask for the operand.
5028 KnownBits Known = DAG.computeKnownBits(Op);
5029 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5030 if (NumSignificantBits == 0)
5031 return DAG.getConstant(0, DL, VT);
5032
5033 // Skip known-zero high parts of the operand.
5034 int64_t OrigBitSize = VT.getSizeInBits();
5035 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5036 BitSize = std::min(BitSize, OrigBitSize);
5037
5038 // The POPCNT instruction counts the number of bits in each byte.
5039 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5040 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5041 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5042
5043 // Add up per-byte counts in a binary tree. All bits of Op at
5044 // position larger than BitSize remain zero throughout.
5045 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5046 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5047 if (BitSize != OrigBitSize)
5048 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5049 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5050 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5051 }
5052
5053 // Extract overall result from high byte.
5054 if (BitSize > 8)
5055 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5056 DAG.getConstant(BitSize - 8, DL, VT));
5057
5058 return Op;
5059}
5060
5061SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5062 SelectionDAG &DAG) const {
5063 SDLoc DL(Op);
5064 AtomicOrdering FenceOrdering =
5065 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5066 SyncScope::ID FenceSSID =
5067 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5068
5069 // The only fence that needs an instruction is a sequentially-consistent
5070 // cross-thread fence.
5071 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5072 FenceSSID == SyncScope::System) {
5073 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5074 Op.getOperand(0)),
5075 0);
5076 }
5077
5078 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5079 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5080}
5081
5082SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5083 SelectionDAG &DAG) const {
5084 EVT RegVT = Op.getValueType();
5085 if (RegVT.getSizeInBits() == 128)
5086 return lowerATOMIC_LDST_I128(Op, DAG);
5087 return lowerLoadF16(Op, DAG);
5088}
5089
5090SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5091 SelectionDAG &DAG) const {
5092 auto *Node = cast<AtomicSDNode>(Op.getNode());
5093 if (Node->getMemoryVT().getSizeInBits() == 128)
5094 return lowerATOMIC_LDST_I128(Op, DAG);
5095 return lowerStoreF16(Op, DAG);
5096}
5097
5098SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5099 SelectionDAG &DAG) const {
5100 auto *Node = cast<AtomicSDNode>(Op.getNode());
5101 assert(
5102 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5103 "Only custom lowering i128 or f128.");
5104 // Use same code to handle both legal and non-legal i128 types.
5106 LowerOperationWrapper(Node, Results, DAG);
5107 return DAG.getMergeValues(Results, SDLoc(Op));
5108}
5109
5110// Prepare for a Compare And Swap for a subword operation. This needs to be
5111// done in memory with 4 bytes at natural alignment.
5113 SDValue &AlignedAddr, SDValue &BitShift,
5114 SDValue &NegBitShift) {
5115 EVT PtrVT = Addr.getValueType();
5116 EVT WideVT = MVT::i32;
5117
5118 // Get the address of the containing word.
5119 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5120 DAG.getSignedConstant(-4, DL, PtrVT));
5121
5122 // Get the number of bits that the word must be rotated left in order
5123 // to bring the field to the top bits of a GR32.
5124 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5125 DAG.getConstant(3, DL, PtrVT));
5126 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5127
5128 // Get the complementing shift amount, for rotating a field in the top
5129 // bits back to its proper position.
5130 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5131 DAG.getConstant(0, DL, WideVT), BitShift);
5132
5133}
5134
5135// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5136// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5137SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5138 SelectionDAG &DAG,
5139 unsigned Opcode) const {
5140 auto *Node = cast<AtomicSDNode>(Op.getNode());
5141
5142 // 32-bit operations need no special handling.
5143 EVT NarrowVT = Node->getMemoryVT();
5144 EVT WideVT = MVT::i32;
5145 if (NarrowVT == WideVT)
5146 return Op;
5147
5148 int64_t BitSize = NarrowVT.getSizeInBits();
5149 SDValue ChainIn = Node->getChain();
5150 SDValue Addr = Node->getBasePtr();
5151 SDValue Src2 = Node->getVal();
5152 MachineMemOperand *MMO = Node->getMemOperand();
5153 SDLoc DL(Node);
5154
5155 // Convert atomic subtracts of constants into additions.
5156 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5157 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5158 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5159 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5160 Src2.getValueType());
5161 }
5162
5163 SDValue AlignedAddr, BitShift, NegBitShift;
5164 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5165
5166 // Extend the source operand to 32 bits and prepare it for the inner loop.
5167 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5168 // operations require the source to be shifted in advance. (This shift
5169 // can be folded if the source is constant.) For AND and NAND, the lower
5170 // bits must be set, while for other opcodes they should be left clear.
5171 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5172 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5173 DAG.getConstant(32 - BitSize, DL, WideVT));
5174 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5175 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5176 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5177 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5178
5179 // Construct the ATOMIC_LOADW_* node.
5180 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5181 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5182 DAG.getConstant(BitSize, DL, WideVT) };
5183 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5184 NarrowVT, MMO);
5185
5186 // Rotate the result of the final CS so that the field is in the lower
5187 // bits of a GR32, then truncate it.
5188 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5189 DAG.getConstant(BitSize, DL, WideVT));
5190 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5191
5192 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5193 return DAG.getMergeValues(RetOps, DL);
5194}
5195
5196// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5197// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5198SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5199 SelectionDAG &DAG) const {
5200 auto *Node = cast<AtomicSDNode>(Op.getNode());
5201 EVT MemVT = Node->getMemoryVT();
5202 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5203 // A full-width operation: negate and use LAA(G).
5204 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5205 assert(Subtarget.hasInterlockedAccess1() &&
5206 "Should have been expanded by AtomicExpand pass.");
5207 SDValue Src2 = Node->getVal();
5208 SDLoc DL(Src2);
5209 SDValue NegSrc2 =
5210 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5211 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5212 Node->getChain(), Node->getBasePtr(), NegSrc2,
5213 Node->getMemOperand());
5214 }
5215
5216 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5217}
5218
5219// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5220SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5221 SelectionDAG &DAG) const {
5222 auto *Node = cast<AtomicSDNode>(Op.getNode());
5223 SDValue ChainIn = Node->getOperand(0);
5224 SDValue Addr = Node->getOperand(1);
5225 SDValue CmpVal = Node->getOperand(2);
5226 SDValue SwapVal = Node->getOperand(3);
5227 MachineMemOperand *MMO = Node->getMemOperand();
5228 SDLoc DL(Node);
5229
5230 if (Node->getMemoryVT() == MVT::i128) {
5231 // Use same code to handle both legal and non-legal i128 types.
5233 LowerOperationWrapper(Node, Results, DAG);
5234 return DAG.getMergeValues(Results, DL);
5235 }
5236
5237 // We have native support for 32-bit and 64-bit compare and swap, but we
5238 // still need to expand extracting the "success" result from the CC.
5239 EVT NarrowVT = Node->getMemoryVT();
5240 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5241 if (NarrowVT == WideVT) {
5242 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5243 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5244 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5245 DL, Tys, Ops, NarrowVT, MMO);
5246 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5248
5249 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5250 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5251 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5252 return SDValue();
5253 }
5254
5255 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5256 // via a fullword ATOMIC_CMP_SWAPW operation.
5257 int64_t BitSize = NarrowVT.getSizeInBits();
5258
5259 SDValue AlignedAddr, BitShift, NegBitShift;
5260 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5261
5262 // Construct the ATOMIC_CMP_SWAPW node.
5263 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5264 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5265 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5266 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5267 VTList, Ops, NarrowVT, MMO);
5268 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5270
5271 // emitAtomicCmpSwapW() will zero extend the result (original value).
5272 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5273 DAG.getValueType(NarrowVT));
5274 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5275 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5276 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5277 return SDValue();
5278}
5279
5281SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5282 // Because of how we convert atomic_load and atomic_store to normal loads and
5283 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5284 // since DAGCombine hasn't been updated to account for atomic, but non
5285 // volatile loads. (See D57601)
5286 if (auto *SI = dyn_cast<StoreInst>(&I))
5287 if (SI->isAtomic())
5289 if (auto *LI = dyn_cast<LoadInst>(&I))
5290 if (LI->isAtomic())
5292 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5293 if (AI->isAtomic())
5295 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5296 if (AI->isAtomic())
5299}
5300
5301SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5302 SelectionDAG &DAG) const {
5303 MachineFunction &MF = DAG.getMachineFunction();
5304 auto *Regs = Subtarget.getSpecialRegisters();
5306 report_fatal_error("Variable-sized stack allocations are not supported "
5307 "in GHC calling convention");
5308 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5309 Regs->getStackPointerRegister(), Op.getValueType());
5310}
5311
5312SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5313 SelectionDAG &DAG) const {
5314 MachineFunction &MF = DAG.getMachineFunction();
5315 auto *Regs = Subtarget.getSpecialRegisters();
5316 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5317
5319 report_fatal_error("Variable-sized stack allocations are not supported "
5320 "in GHC calling convention");
5321
5322 SDValue Chain = Op.getOperand(0);
5323 SDValue NewSP = Op.getOperand(1);
5324 SDValue Backchain;
5325 SDLoc DL(Op);
5326
5327 if (StoreBackchain) {
5328 SDValue OldSP = DAG.getCopyFromReg(
5329 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5330 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5331 MachinePointerInfo());
5332 }
5333
5334 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5335
5336 if (StoreBackchain)
5337 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5338 MachinePointerInfo());
5339
5340 return Chain;
5341}
5342
5343SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5344 SelectionDAG &DAG) const {
5345 bool IsData = Op.getConstantOperandVal(4);
5346 if (!IsData)
5347 // Just preserve the chain.
5348 return Op.getOperand(0);
5349
5350 SDLoc DL(Op);
5351 bool IsWrite = Op.getConstantOperandVal(2);
5352 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5353 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5354 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5355 Op.getOperand(1)};
5356 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5357 Node->getVTList(), Ops,
5358 Node->getMemoryVT(), Node->getMemOperand());
5359}
5360
5361SDValue
5362SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5363 SelectionDAG &DAG) const {
5364 unsigned Opcode, CCValid;
5365 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5366 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5367 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5368 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5369 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5370 return SDValue();
5371 }
5372
5373 return SDValue();
5374}
5375
5376SDValue
5377SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5378 SelectionDAG &DAG) const {
5379 unsigned Opcode, CCValid;
5380 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5381 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5382 if (Op->getNumValues() == 1)
5383 return getCCResult(DAG, SDValue(Node, 0));
5384 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5385 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5386 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5387 }
5388
5389 unsigned Id = Op.getConstantOperandVal(0);
5390 switch (Id) {
5391 case Intrinsic::thread_pointer:
5392 return lowerThreadPointer(SDLoc(Op), DAG);
5393
5394 case Intrinsic::s390_vpdi:
5395 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5396 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5397
5398 case Intrinsic::s390_vperm:
5399 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5400 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5401
5402 case Intrinsic::s390_vuphb:
5403 case Intrinsic::s390_vuphh:
5404 case Intrinsic::s390_vuphf:
5405 case Intrinsic::s390_vuphg:
5406 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5407 Op.getOperand(1));
5408
5409 case Intrinsic::s390_vuplhb:
5410 case Intrinsic::s390_vuplhh:
5411 case Intrinsic::s390_vuplhf:
5412 case Intrinsic::s390_vuplhg:
5413 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5414 Op.getOperand(1));
5415
5416 case Intrinsic::s390_vuplb:
5417 case Intrinsic::s390_vuplhw:
5418 case Intrinsic::s390_vuplf:
5419 case Intrinsic::s390_vuplg:
5420 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5421 Op.getOperand(1));
5422
5423 case Intrinsic::s390_vupllb:
5424 case Intrinsic::s390_vupllh:
5425 case Intrinsic::s390_vupllf:
5426 case Intrinsic::s390_vupllg:
5427 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5428 Op.getOperand(1));
5429
5430 case Intrinsic::s390_vsumb:
5431 case Intrinsic::s390_vsumh:
5432 case Intrinsic::s390_vsumgh:
5433 case Intrinsic::s390_vsumgf:
5434 case Intrinsic::s390_vsumqf:
5435 case Intrinsic::s390_vsumqg:
5436 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5437 Op.getOperand(1), Op.getOperand(2));
5438
5439 case Intrinsic::s390_vaq:
5440 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5441 Op.getOperand(1), Op.getOperand(2));
5442 case Intrinsic::s390_vaccb:
5443 case Intrinsic::s390_vacch:
5444 case Intrinsic::s390_vaccf:
5445 case Intrinsic::s390_vaccg:
5446 case Intrinsic::s390_vaccq:
5447 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5448 Op.getOperand(1), Op.getOperand(2));
5449 case Intrinsic::s390_vacq:
5450 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5451 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5452 case Intrinsic::s390_vacccq:
5453 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5454 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5455
5456 case Intrinsic::s390_vsq:
5457 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5458 Op.getOperand(1), Op.getOperand(2));
5459 case Intrinsic::s390_vscbib:
5460 case Intrinsic::s390_vscbih:
5461 case Intrinsic::s390_vscbif:
5462 case Intrinsic::s390_vscbig:
5463 case Intrinsic::s390_vscbiq:
5464 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5465 Op.getOperand(1), Op.getOperand(2));
5466 case Intrinsic::s390_vsbiq:
5467 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5468 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5469 case Intrinsic::s390_vsbcbiq:
5470 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5471 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5472
5473 case Intrinsic::s390_vmhb:
5474 case Intrinsic::s390_vmhh:
5475 case Intrinsic::s390_vmhf:
5476 case Intrinsic::s390_vmhg:
5477 case Intrinsic::s390_vmhq:
5478 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5479 Op.getOperand(1), Op.getOperand(2));
5480 case Intrinsic::s390_vmlhb:
5481 case Intrinsic::s390_vmlhh:
5482 case Intrinsic::s390_vmlhf:
5483 case Intrinsic::s390_vmlhg:
5484 case Intrinsic::s390_vmlhq:
5485 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5486 Op.getOperand(1), Op.getOperand(2));
5487
5488 case Intrinsic::s390_vmahb:
5489 case Intrinsic::s390_vmahh:
5490 case Intrinsic::s390_vmahf:
5491 case Intrinsic::s390_vmahg:
5492 case Intrinsic::s390_vmahq:
5493 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5494 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5495 case Intrinsic::s390_vmalhb:
5496 case Intrinsic::s390_vmalhh:
5497 case Intrinsic::s390_vmalhf:
5498 case Intrinsic::s390_vmalhg:
5499 case Intrinsic::s390_vmalhq:
5500 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5501 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5502
5503 case Intrinsic::s390_vmeb:
5504 case Intrinsic::s390_vmeh:
5505 case Intrinsic::s390_vmef:
5506 case Intrinsic::s390_vmeg:
5507 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5508 Op.getOperand(1), Op.getOperand(2));
5509 case Intrinsic::s390_vmleb:
5510 case Intrinsic::s390_vmleh:
5511 case Intrinsic::s390_vmlef:
5512 case Intrinsic::s390_vmleg:
5513 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5514 Op.getOperand(1), Op.getOperand(2));
5515 case Intrinsic::s390_vmob:
5516 case Intrinsic::s390_vmoh:
5517 case Intrinsic::s390_vmof:
5518 case Intrinsic::s390_vmog:
5519 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5520 Op.getOperand(1), Op.getOperand(2));
5521 case Intrinsic::s390_vmlob:
5522 case Intrinsic::s390_vmloh:
5523 case Intrinsic::s390_vmlof:
5524 case Intrinsic::s390_vmlog:
5525 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5526 Op.getOperand(1), Op.getOperand(2));
5527
5528 case Intrinsic::s390_vmaeb:
5529 case Intrinsic::s390_vmaeh:
5530 case Intrinsic::s390_vmaef:
5531 case Intrinsic::s390_vmaeg:
5532 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5533 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5534 Op.getOperand(1), Op.getOperand(2)),
5535 Op.getOperand(3));
5536 case Intrinsic::s390_vmaleb:
5537 case Intrinsic::s390_vmaleh:
5538 case Intrinsic::s390_vmalef:
5539 case Intrinsic::s390_vmaleg:
5540 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5541 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5542 Op.getOperand(1), Op.getOperand(2)),
5543 Op.getOperand(3));
5544 case Intrinsic::s390_vmaob:
5545 case Intrinsic::s390_vmaoh:
5546 case Intrinsic::s390_vmaof:
5547 case Intrinsic::s390_vmaog:
5548 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5549 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5550 Op.getOperand(1), Op.getOperand(2)),
5551 Op.getOperand(3));
5552 case Intrinsic::s390_vmalob:
5553 case Intrinsic::s390_vmaloh:
5554 case Intrinsic::s390_vmalof:
5555 case Intrinsic::s390_vmalog:
5556 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5557 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5558 Op.getOperand(1), Op.getOperand(2)),
5559 Op.getOperand(3));
5560 }
5561
5562 return SDValue();
5563}
5564
5565namespace {
5566// Says that SystemZISD operation Opcode can be used to perform the equivalent
5567// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5568// Operand is the constant third operand, otherwise it is the number of
5569// bytes in each element of the result.
5570struct Permute {
5571 unsigned Opcode;
5572 unsigned Operand;
5573 unsigned char Bytes[SystemZ::VectorBytes];
5574};
5575}
5576
5577static const Permute PermuteForms[] = {
5578 // VMRHG
5579 { SystemZISD::MERGE_HIGH, 8,
5580 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5581 // VMRHF
5582 { SystemZISD::MERGE_HIGH, 4,
5583 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5584 // VMRHH
5585 { SystemZISD::MERGE_HIGH, 2,
5586 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5587 // VMRHB
5588 { SystemZISD::MERGE_HIGH, 1,
5589 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5590 // VMRLG
5591 { SystemZISD::MERGE_LOW, 8,
5592 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5593 // VMRLF
5594 { SystemZISD::MERGE_LOW, 4,
5595 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5596 // VMRLH
5597 { SystemZISD::MERGE_LOW, 2,
5598 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5599 // VMRLB
5600 { SystemZISD::MERGE_LOW, 1,
5601 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5602 // VPKG
5603 { SystemZISD::PACK, 4,
5604 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5605 // VPKF
5606 { SystemZISD::PACK, 2,
5607 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5608 // VPKH
5609 { SystemZISD::PACK, 1,
5610 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5611 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5612 { SystemZISD::PERMUTE_DWORDS, 4,
5613 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5614 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5615 { SystemZISD::PERMUTE_DWORDS, 1,
5616 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5617};
5618
5619// Called after matching a vector shuffle against a particular pattern.
5620// Both the original shuffle and the pattern have two vector operands.
5621// OpNos[0] is the operand of the original shuffle that should be used for
5622// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5623// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5624// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5625// for operands 0 and 1 of the pattern.
5626static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5627 if (OpNos[0] < 0) {
5628 if (OpNos[1] < 0)
5629 return false;
5630 OpNo0 = OpNo1 = OpNos[1];
5631 } else if (OpNos[1] < 0) {
5632 OpNo0 = OpNo1 = OpNos[0];
5633 } else {
5634 OpNo0 = OpNos[0];
5635 OpNo1 = OpNos[1];
5636 }
5637 return true;
5638}
5639
5640// Bytes is a VPERM-like permute vector, except that -1 is used for
5641// undefined bytes. Return true if the VPERM can be implemented using P.
5642// When returning true set OpNo0 to the VPERM operand that should be
5643// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5644//
5645// For example, if swapping the VPERM operands allows P to match, OpNo0
5646// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5647// operand, but rewriting it to use two duplicated operands allows it to
5648// match P, then OpNo0 and OpNo1 will be the same.
5649static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5650 unsigned &OpNo0, unsigned &OpNo1) {
5651 int OpNos[] = { -1, -1 };
5652 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5653 int Elt = Bytes[I];
5654 if (Elt >= 0) {
5655 // Make sure that the two permute vectors use the same suboperand
5656 // byte number. Only the operand numbers (the high bits) are
5657 // allowed to differ.
5658 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5659 return false;
5660 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5661 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5662 // Make sure that the operand mappings are consistent with previous
5663 // elements.
5664 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5665 return false;
5666 OpNos[ModelOpNo] = RealOpNo;
5667 }
5668 }
5669 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5670}
5671
5672// As above, but search for a matching permute.
5673static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5674 unsigned &OpNo0, unsigned &OpNo1) {
5675 for (auto &P : PermuteForms)
5676 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5677 return &P;
5678 return nullptr;
5679}
5680
5681// Bytes is a VPERM-like permute vector, except that -1 is used for
5682// undefined bytes. This permute is an operand of an outer permute.
5683// See whether redistributing the -1 bytes gives a shuffle that can be
5684// implemented using P. If so, set Transform to a VPERM-like permute vector
5685// that, when applied to the result of P, gives the original permute in Bytes.
5687 const Permute &P,
5688 SmallVectorImpl<int> &Transform) {
5689 unsigned To = 0;
5690 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5691 int Elt = Bytes[From];
5692 if (Elt < 0)
5693 // Byte number From of the result is undefined.
5694 Transform[From] = -1;
5695 else {
5696 while (P.Bytes[To] != Elt) {
5697 To += 1;
5698 if (To == SystemZ::VectorBytes)
5699 return false;
5700 }
5701 Transform[From] = To;
5702 }
5703 }
5704 return true;
5705}
5706
5707// As above, but search for a matching permute.
5708static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5709 SmallVectorImpl<int> &Transform) {
5710 for (auto &P : PermuteForms)
5711 if (matchDoublePermute(Bytes, P, Transform))
5712 return &P;
5713 return nullptr;
5714}
5715
5716// Convert the mask of the given shuffle op into a byte-level mask,
5717// as if it had type vNi8.
5718static bool getVPermMask(SDValue ShuffleOp,
5719 SmallVectorImpl<int> &Bytes) {
5720 EVT VT = ShuffleOp.getValueType();
5721 unsigned NumElements = VT.getVectorNumElements();
5722 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5723
5724 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5725 Bytes.resize(NumElements * BytesPerElement, -1);
5726 for (unsigned I = 0; I < NumElements; ++I) {
5727 int Index = VSN->getMaskElt(I);
5728 if (Index >= 0)
5729 for (unsigned J = 0; J < BytesPerElement; ++J)
5730 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5731 }
5732 return true;
5733 }
5734 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5735 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5736 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5737 Bytes.resize(NumElements * BytesPerElement, -1);
5738 for (unsigned I = 0; I < NumElements; ++I)
5739 for (unsigned J = 0; J < BytesPerElement; ++J)
5740 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5741 return true;
5742 }
5743 return false;
5744}
5745
5746// Bytes is a VPERM-like permute vector, except that -1 is used for
5747// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5748// the result come from a contiguous sequence of bytes from one input.
5749// Set Base to the selector for the first byte if so.
5750static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5751 unsigned BytesPerElement, int &Base) {
5752 Base = -1;
5753 for (unsigned I = 0; I < BytesPerElement; ++I) {
5754 if (Bytes[Start + I] >= 0) {
5755 unsigned Elem = Bytes[Start + I];
5756 if (Base < 0) {
5757 Base = Elem - I;
5758 // Make sure the bytes would come from one input operand.
5759 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5760 return false;
5761 } else if (unsigned(Base) != Elem - I)
5762 return false;
5763 }
5764 }
5765 return true;
5766}
5767
5768// Bytes is a VPERM-like permute vector, except that -1 is used for
5769// undefined bytes. Return true if it can be performed using VSLDB.
5770// When returning true, set StartIndex to the shift amount and OpNo0
5771// and OpNo1 to the VPERM operands that should be used as the first
5772// and second shift operand respectively.
5774 unsigned &StartIndex, unsigned &OpNo0,
5775 unsigned &OpNo1) {
5776 int OpNos[] = { -1, -1 };
5777 int Shift = -1;
5778 for (unsigned I = 0; I < 16; ++I) {
5779 int Index = Bytes[I];
5780 if (Index >= 0) {
5781 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5782 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5783 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5784 if (Shift < 0)
5785 Shift = ExpectedShift;
5786 else if (Shift != ExpectedShift)
5787 return false;
5788 // Make sure that the operand mappings are consistent with previous
5789 // elements.
5790 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5791 return false;
5792 OpNos[ModelOpNo] = RealOpNo;
5793 }
5794 }
5795 StartIndex = Shift;
5796 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5797}
5798
5799// Create a node that performs P on operands Op0 and Op1, casting the
5800// operands to the appropriate type. The type of the result is determined by P.
5802 const Permute &P, SDValue Op0, SDValue Op1) {
5803 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5804 // elements of a PACK are twice as wide as the outputs.
5805 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5806 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5807 P.Operand);
5808 // Cast both operands to the appropriate type.
5809 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5810 SystemZ::VectorBytes / InBytes);
5811 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5812 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5813 SDValue Op;
5814 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5815 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5816 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5817 } else if (P.Opcode == SystemZISD::PACK) {
5818 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5819 SystemZ::VectorBytes / P.Operand);
5820 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5821 } else {
5822 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5823 }
5824 return Op;
5825}
5826
5827static bool isZeroVector(SDValue N) {
5828 if (N->getOpcode() == ISD::BITCAST)
5829 N = N->getOperand(0);
5830 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5831 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5832 return Op->getZExtValue() == 0;
5833 return ISD::isBuildVectorAllZeros(N.getNode());
5834}
5835
5836// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5837static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5838 for (unsigned I = 0; I < Num ; I++)
5839 if (isZeroVector(Ops[I]))
5840 return I;
5841 return UINT32_MAX;
5842}
5843
5844// Bytes is a VPERM-like permute vector, except that -1 is used for
5845// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5846// VSLDB or VPERM.
5848 SDValue *Ops,
5849 const SmallVectorImpl<int> &Bytes) {
5850 for (unsigned I = 0; I < 2; ++I)
5851 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5852
5853 // First see whether VSLDB can be used.
5854 unsigned StartIndex, OpNo0, OpNo1;
5855 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5856 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5857 Ops[OpNo1],
5858 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5859
5860 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5861 // eliminate a zero vector by reusing any zero index in the permute vector.
5862 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5863 if (ZeroVecIdx != UINT32_MAX) {
5864 bool MaskFirst = true;
5865 int ZeroIdx = -1;
5866 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5867 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5868 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5869 if (OpNo == ZeroVecIdx && I == 0) {
5870 // If the first byte is zero, use mask as first operand.
5871 ZeroIdx = 0;
5872 break;
5873 }
5874 if (OpNo != ZeroVecIdx && Byte == 0) {
5875 // If mask contains a zero, use it by placing that vector first.
5876 ZeroIdx = I + SystemZ::VectorBytes;
5877 MaskFirst = false;
5878 break;
5879 }
5880 }
5881 if (ZeroIdx != -1) {
5882 SDValue IndexNodes[SystemZ::VectorBytes];
5883 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5884 if (Bytes[I] >= 0) {
5885 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5886 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5887 if (OpNo == ZeroVecIdx)
5888 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5889 else {
5890 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5891 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5892 }
5893 } else
5894 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5895 }
5896 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5897 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5898 if (MaskFirst)
5899 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5900 Mask);
5901 else
5902 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5903 Mask);
5904 }
5905 }
5906
5907 SDValue IndexNodes[SystemZ::VectorBytes];
5908 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5909 if (Bytes[I] >= 0)
5910 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5911 else
5912 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5913 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5914 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5915 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5916}
5917
5918namespace {
5919// Describes a general N-operand vector shuffle.
5920struct GeneralShuffle {
5921 GeneralShuffle(EVT vt)
5922 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5923 void addUndef();
5924 bool add(SDValue, unsigned);
5925 SDValue getNode(SelectionDAG &, const SDLoc &);
5926 void tryPrepareForUnpack();
5927 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5928 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5929
5930 // The operands of the shuffle.
5932
5933 // Index I is -1 if byte I of the result is undefined. Otherwise the
5934 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5935 // Bytes[I] / SystemZ::VectorBytes.
5937
5938 // The type of the shuffle result.
5939 EVT VT;
5940
5941 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5942 unsigned UnpackFromEltSize;
5943 // True if the final unpack uses the low half.
5944 bool UnpackLow;
5945};
5946} // namespace
5947
5948// Add an extra undefined element to the shuffle.
5949void GeneralShuffle::addUndef() {
5950 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5951 for (unsigned I = 0; I < BytesPerElement; ++I)
5952 Bytes.push_back(-1);
5953}
5954
5955// Add an extra element to the shuffle, taking it from element Elem of Op.
5956// A null Op indicates a vector input whose value will be calculated later;
5957// there is at most one such input per shuffle and it always has the same
5958// type as the result. Aborts and returns false if the source vector elements
5959// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5960// LLVM they become implicitly extended, but this is rare and not optimized.
5961bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5962 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5963
5964 // The source vector can have wider elements than the result,
5965 // either through an explicit TRUNCATE or because of type legalization.
5966 // We want the least significant part.
5967 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5968 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5969
5970 // Return false if the source elements are smaller than their destination
5971 // elements.
5972 if (FromBytesPerElement < BytesPerElement)
5973 return false;
5974
5975 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5976 (FromBytesPerElement - BytesPerElement));
5977
5978 // Look through things like shuffles and bitcasts.
5979 while (Op.getNode()) {
5980 if (Op.getOpcode() == ISD::BITCAST)
5981 Op = Op.getOperand(0);
5982 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5983 // See whether the bytes we need come from a contiguous part of one
5984 // operand.
5986 if (!getVPermMask(Op, OpBytes))
5987 break;
5988 int NewByte;
5989 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5990 break;
5991 if (NewByte < 0) {
5992 addUndef();
5993 return true;
5994 }
5995 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5996 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5997 } else if (Op.isUndef()) {
5998 addUndef();
5999 return true;
6000 } else
6001 break;
6002 }
6003
6004 // Make sure that the source of the extraction is in Ops.
6005 unsigned OpNo = 0;
6006 for (; OpNo < Ops.size(); ++OpNo)
6007 if (Ops[OpNo] == Op)
6008 break;
6009 if (OpNo == Ops.size())
6010 Ops.push_back(Op);
6011
6012 // Add the element to Bytes.
6013 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6014 for (unsigned I = 0; I < BytesPerElement; ++I)
6015 Bytes.push_back(Base + I);
6016
6017 return true;
6018}
6019
6020// Return SDNodes for the completed shuffle.
6021SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6022 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6023
6024 if (Ops.size() == 0)
6025 return DAG.getUNDEF(VT);
6026
6027 // Use a single unpack if possible as the last operation.
6028 tryPrepareForUnpack();
6029
6030 // Make sure that there are at least two shuffle operands.
6031 if (Ops.size() == 1)
6032 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6033
6034 // Create a tree of shuffles, deferring root node until after the loop.
6035 // Try to redistribute the undefined elements of non-root nodes so that
6036 // the non-root shuffles match something like a pack or merge, then adjust
6037 // the parent node's permute vector to compensate for the new order.
6038 // Among other things, this copes with vectors like <2 x i16> that were
6039 // padded with undefined elements during type legalization.
6040 //
6041 // In the best case this redistribution will lead to the whole tree
6042 // using packs and merges. It should rarely be a loss in other cases.
6043 unsigned Stride = 1;
6044 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6045 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6046 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6047
6048 // Create a mask for just these two operands.
6050 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6051 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6052 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6053 if (OpNo == I)
6054 NewBytes[J] = Byte;
6055 else if (OpNo == I + Stride)
6056 NewBytes[J] = SystemZ::VectorBytes + Byte;
6057 else
6058 NewBytes[J] = -1;
6059 }
6060 // See if it would be better to reorganize NewMask to avoid using VPERM.
6062 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6063 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6064 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6065 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6066 if (NewBytes[J] >= 0) {
6067 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6068 "Invalid double permute");
6069 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6070 } else
6071 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6072 }
6073 } else {
6074 // Just use NewBytes on the operands.
6075 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6076 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6077 if (NewBytes[J] >= 0)
6078 Bytes[J] = I * SystemZ::VectorBytes + J;
6079 }
6080 }
6081 }
6082
6083 // Now we just have 2 inputs. Put the second operand in Ops[1].
6084 if (Stride > 1) {
6085 Ops[1] = Ops[Stride];
6086 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6087 if (Bytes[I] >= int(SystemZ::VectorBytes))
6088 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6089 }
6090
6091 // Look for an instruction that can do the permute without resorting
6092 // to VPERM.
6093 unsigned OpNo0, OpNo1;
6094 SDValue Op;
6095 if (unpackWasPrepared() && Ops[1].isUndef())
6096 Op = Ops[0];
6097 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6098 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6099 else
6100 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6101
6102 Op = insertUnpackIfPrepared(DAG, DL, Op);
6103
6104 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6105}
6106
6107#ifndef NDEBUG
6108static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6109 dbgs() << Msg.c_str() << " { ";
6110 for (unsigned I = 0; I < Bytes.size(); I++)
6111 dbgs() << Bytes[I] << " ";
6112 dbgs() << "}\n";
6113}
6114#endif
6115
6116// If the Bytes vector matches an unpack operation, prepare to do the unpack
6117// after all else by removing the zero vector and the effect of the unpack on
6118// Bytes.
6119void GeneralShuffle::tryPrepareForUnpack() {
6120 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6121 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6122 return;
6123
6124 // Only do this if removing the zero vector reduces the depth, otherwise
6125 // the critical path will increase with the final unpack.
6126 if (Ops.size() > 2 &&
6127 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6128 return;
6129
6130 // Find an unpack that would allow removing the zero vector from Ops.
6131 UnpackFromEltSize = 1;
6132 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6133 bool MatchUnpack = true;
6135 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6136 unsigned ToEltSize = UnpackFromEltSize * 2;
6137 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6138 if (!IsZextByte)
6139 SrcBytes.push_back(Bytes[Elt]);
6140 if (Bytes[Elt] != -1) {
6141 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6142 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6143 MatchUnpack = false;
6144 break;
6145 }
6146 }
6147 }
6148 if (MatchUnpack) {
6149 if (Ops.size() == 2) {
6150 // Don't use unpack if a single source operand needs rearrangement.
6151 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6152 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6153 if (SrcBytes[i] == -1)
6154 continue;
6155 if (SrcBytes[i] % 16 != int(i))
6156 CanUseUnpackHigh = false;
6157 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6158 CanUseUnpackLow = false;
6159 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6160 UnpackFromEltSize = UINT_MAX;
6161 return;
6162 }
6163 }
6164 if (!CanUseUnpackHigh)
6165 UnpackLow = true;
6166 }
6167 break;
6168 }
6169 }
6170 if (UnpackFromEltSize > 4)
6171 return;
6172
6173 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6174 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6175 << ".\n";
6176 dumpBytes(Bytes, "Original Bytes vector:"););
6177
6178 // Apply the unpack in reverse to the Bytes array.
6179 unsigned B = 0;
6180 if (UnpackLow) {
6181 while (B < SystemZ::VectorBytes / 2)
6182 Bytes[B++] = -1;
6183 }
6184 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6185 Elt += UnpackFromEltSize;
6186 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6187 Bytes[B] = Bytes[Elt];
6188 }
6189 if (!UnpackLow) {
6190 while (B < SystemZ::VectorBytes)
6191 Bytes[B++] = -1;
6192 }
6193
6194 // Remove the zero vector from Ops
6195 Ops.erase(&Ops[ZeroVecOpNo]);
6196 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6197 if (Bytes[I] >= 0) {
6198 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6199 if (OpNo > ZeroVecOpNo)
6200 Bytes[I] -= SystemZ::VectorBytes;
6201 }
6202
6203 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6204 dbgs() << "\n";);
6205}
6206
6207SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6208 const SDLoc &DL,
6209 SDValue Op) {
6210 if (!unpackWasPrepared())
6211 return Op;
6212 unsigned InBits = UnpackFromEltSize * 8;
6213 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6214 SystemZ::VectorBits / InBits);
6215 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6216 unsigned OutBits = InBits * 2;
6217 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6218 SystemZ::VectorBits / OutBits);
6219 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6220 : SystemZISD::UNPACKL_HIGH,
6221 DL, OutVT, PackedOp);
6222}
6223
6224// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6226 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6227 if (!Op.getOperand(I).isUndef())
6228 return false;
6229 return true;
6230}
6231
6232// Return a vector of type VT that contains Value in the first element.
6233// The other elements don't matter.
6235 SDValue Value) {
6236 // If we have a constant, replicate it to all elements and let the
6237 // BUILD_VECTOR lowering take care of it.
6238 if (Value.getOpcode() == ISD::Constant ||
6239 Value.getOpcode() == ISD::ConstantFP) {
6241 return DAG.getBuildVector(VT, DL, Ops);
6242 }
6243 if (Value.isUndef())
6244 return DAG.getUNDEF(VT);
6245 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6246}
6247
6248// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6249// element 1. Used for cases in which replication is cheap.
6251 SDValue Op0, SDValue Op1) {
6252 if (Op0.isUndef()) {
6253 if (Op1.isUndef())
6254 return DAG.getUNDEF(VT);
6255 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6256 }
6257 if (Op1.isUndef())
6258 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6259 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6260 buildScalarToVector(DAG, DL, VT, Op0),
6261 buildScalarToVector(DAG, DL, VT, Op1));
6262}
6263
6264// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6265// vector for them.
6267 SDValue Op1) {
6268 if (Op0.isUndef() && Op1.isUndef())
6269 return DAG.getUNDEF(MVT::v2i64);
6270 // If one of the two inputs is undefined then replicate the other one,
6271 // in order to avoid using another register unnecessarily.
6272 if (Op0.isUndef())
6273 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6274 else if (Op1.isUndef())
6275 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6276 else {
6277 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6278 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6279 }
6280 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6281}
6282
6283// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6284// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6285// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6286// would benefit from this representation and return it if so.
6288 BuildVectorSDNode *BVN) {
6289 EVT VT = BVN->getValueType(0);
6290 unsigned NumElements = VT.getVectorNumElements();
6291
6292 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6293 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6294 // need a BUILD_VECTOR, add an additional placeholder operand for that
6295 // BUILD_VECTOR and store its operands in ResidueOps.
6296 GeneralShuffle GS(VT);
6298 bool FoundOne = false;
6299 for (unsigned I = 0; I < NumElements; ++I) {
6300 SDValue Op = BVN->getOperand(I);
6301 if (Op.getOpcode() == ISD::TRUNCATE)
6302 Op = Op.getOperand(0);
6303 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6304 Op.getOperand(1).getOpcode() == ISD::Constant) {
6305 unsigned Elem = Op.getConstantOperandVal(1);
6306 if (!GS.add(Op.getOperand(0), Elem))
6307 return SDValue();
6308 FoundOne = true;
6309 } else if (Op.isUndef()) {
6310 GS.addUndef();
6311 } else {
6312 if (!GS.add(SDValue(), ResidueOps.size()))
6313 return SDValue();
6314 ResidueOps.push_back(BVN->getOperand(I));
6315 }
6316 }
6317
6318 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6319 if (!FoundOne)
6320 return SDValue();
6321
6322 // Create the BUILD_VECTOR for the remaining elements, if any.
6323 if (!ResidueOps.empty()) {
6324 while (ResidueOps.size() < NumElements)
6325 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6326 for (auto &Op : GS.Ops) {
6327 if (!Op.getNode()) {
6328 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6329 break;
6330 }
6331 }
6332 }
6333 return GS.getNode(DAG, SDLoc(BVN));
6334}
6335
6336bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6337 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6338 return true;
6339 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6340 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6341 return true;
6342 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6343 return true;
6344 return false;
6345}
6346
6347// Combine GPR scalar values Elems into a vector of type VT.
6348SDValue
6349SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6350 SmallVectorImpl<SDValue> &Elems) const {
6351 // See whether there is a single replicated value.
6353 unsigned int NumElements = Elems.size();
6354 unsigned int Count = 0;
6355 for (auto Elem : Elems) {
6356 if (!Elem.isUndef()) {
6357 if (!Single.getNode())
6358 Single = Elem;
6359 else if (Elem != Single) {
6360 Single = SDValue();
6361 break;
6362 }
6363 Count += 1;
6364 }
6365 }
6366 // There are three cases here:
6367 //
6368 // - if the only defined element is a loaded one, the best sequence
6369 // is a replicating load.
6370 //
6371 // - otherwise, if the only defined element is an i64 value, we will
6372 // end up with the same VLVGP sequence regardless of whether we short-cut
6373 // for replication or fall through to the later code.
6374 //
6375 // - otherwise, if the only defined element is an i32 or smaller value,
6376 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6377 // This is only a win if the single defined element is used more than once.
6378 // In other cases we're better off using a single VLVGx.
6379 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6380 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6381
6382 // If all elements are loads, use VLREP/VLEs (below).
6383 bool AllLoads = true;
6384 for (auto Elem : Elems)
6385 if (!isVectorElementLoad(Elem)) {
6386 AllLoads = false;
6387 break;
6388 }
6389
6390 // The best way of building a v2i64 from two i64s is to use VLVGP.
6391 if (VT == MVT::v2i64 && !AllLoads)
6392 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6393
6394 // Use a 64-bit merge high to combine two doubles.
6395 if (VT == MVT::v2f64 && !AllLoads)
6396 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6397
6398 // Build v4f32 values directly from the FPRs:
6399 //
6400 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6401 // V V VMRHF
6402 // <ABxx> <CDxx>
6403 // V VMRHG
6404 // <ABCD>
6405 if (VT == MVT::v4f32 && !AllLoads) {
6406 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6407 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6408 // Avoid unnecessary undefs by reusing the other operand.
6409 if (Op01.isUndef())
6410 Op01 = Op23;
6411 else if (Op23.isUndef())
6412 Op23 = Op01;
6413 // Merging identical replications is a no-op.
6414 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6415 return Op01;
6416 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6417 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6418 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
6419 DL, MVT::v2i64, Op01, Op23);
6420 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6421 }
6422
6423 // Collect the constant terms.
6426
6427 unsigned NumConstants = 0;
6428 for (unsigned I = 0; I < NumElements; ++I) {
6429 SDValue Elem = Elems[I];
6430 if (Elem.getOpcode() == ISD::Constant ||
6431 Elem.getOpcode() == ISD::ConstantFP) {
6432 NumConstants += 1;
6433 Constants[I] = Elem;
6434 Done[I] = true;
6435 }
6436 }
6437 // If there was at least one constant, fill in the other elements of
6438 // Constants with undefs to get a full vector constant and use that
6439 // as the starting point.
6441 SDValue ReplicatedVal;
6442 if (NumConstants > 0) {
6443 for (unsigned I = 0; I < NumElements; ++I)
6444 if (!Constants[I].getNode())
6445 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6446 Result = DAG.getBuildVector(VT, DL, Constants);
6447 } else {
6448 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6449 // avoid a false dependency on any previous contents of the vector
6450 // register.
6451
6452 // Use a VLREP if at least one element is a load. Make sure to replicate
6453 // the load with the most elements having its value.
6454 std::map<const SDNode*, unsigned> UseCounts;
6455 SDNode *LoadMaxUses = nullptr;
6456 for (unsigned I = 0; I < NumElements; ++I)
6457 if (isVectorElementLoad(Elems[I])) {
6458 SDNode *Ld = Elems[I].getNode();
6459 unsigned Count = ++UseCounts[Ld];
6460 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6461 LoadMaxUses = Ld;
6462 }
6463 if (LoadMaxUses != nullptr) {
6464 ReplicatedVal = SDValue(LoadMaxUses, 0);
6465 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6466 } else {
6467 // Try to use VLVGP.
6468 unsigned I1 = NumElements / 2 - 1;
6469 unsigned I2 = NumElements - 1;
6470 bool Def1 = !Elems[I1].isUndef();
6471 bool Def2 = !Elems[I2].isUndef();
6472 if (Def1 || Def2) {
6473 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6474 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6475 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6476 joinDwords(DAG, DL, Elem1, Elem2));
6477 Done[I1] = true;
6478 Done[I2] = true;
6479 } else
6480 Result = DAG.getUNDEF(VT);
6481 }
6482 }
6483
6484 // Use VLVGx to insert the other elements.
6485 for (unsigned I = 0; I < NumElements; ++I)
6486 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6487 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6488 DAG.getConstant(I, DL, MVT::i32));
6489 return Result;
6490}
6491
6492SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6493 SelectionDAG &DAG) const {
6494 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6495 SDLoc DL(Op);
6496 EVT VT = Op.getValueType();
6497
6498 if (BVN->isConstant()) {
6499 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6500 return Op;
6501
6502 // Fall back to loading it from memory.
6503 return SDValue();
6504 }
6505
6506 // See if we should use shuffles to construct the vector from other vectors.
6507 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6508 return Res;
6509
6510 // Detect SCALAR_TO_VECTOR conversions.
6512 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6513
6514 // Otherwise use buildVector to build the vector up from GPRs.
6515 unsigned NumElements = Op.getNumOperands();
6517 for (unsigned I = 0; I < NumElements; ++I)
6518 Ops[I] = Op.getOperand(I);
6519 return buildVector(DAG, DL, VT, Ops);
6520}
6521
6522SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6523 SelectionDAG &DAG) const {
6524 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6525 SDLoc DL(Op);
6526 EVT VT = Op.getValueType();
6527 unsigned NumElements = VT.getVectorNumElements();
6528
6529 if (VSN->isSplat()) {
6530 SDValue Op0 = Op.getOperand(0);
6531 unsigned Index = VSN->getSplatIndex();
6532 assert(Index < VT.getVectorNumElements() &&
6533 "Splat index should be defined and in first operand");
6534 // See whether the value we're splatting is directly available as a scalar.
6535 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6537 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6538 // Otherwise keep it as a vector-to-vector operation.
6539 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6540 DAG.getTargetConstant(Index, DL, MVT::i32));
6541 }
6542
6543 GeneralShuffle GS(VT);
6544 for (unsigned I = 0; I < NumElements; ++I) {
6545 int Elt = VSN->getMaskElt(I);
6546 if (Elt < 0)
6547 GS.addUndef();
6548 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6549 unsigned(Elt) % NumElements))
6550 return SDValue();
6551 }
6552 return GS.getNode(DAG, SDLoc(VSN));
6553}
6554
6555SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6556 SelectionDAG &DAG) const {
6557 SDLoc DL(Op);
6558 // Just insert the scalar into element 0 of an undefined vector.
6559 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6560 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6561 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6562}
6563
6564SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6565 SelectionDAG &DAG) const {
6566 // Handle insertions of floating-point values.
6567 SDLoc DL(Op);
6568 SDValue Op0 = Op.getOperand(0);
6569 SDValue Op1 = Op.getOperand(1);
6570 SDValue Op2 = Op.getOperand(2);
6571 EVT VT = Op.getValueType();
6572
6573 // Insertions into constant indices of a v2f64 can be done using VPDI.
6574 // However, if the inserted value is a bitcast or a constant then it's
6575 // better to use GPRs, as below.
6576 if (VT == MVT::v2f64 &&
6577 Op1.getOpcode() != ISD::BITCAST &&
6578 Op1.getOpcode() != ISD::ConstantFP &&
6579 Op2.getOpcode() == ISD::Constant) {
6580 uint64_t Index = Op2->getAsZExtVal();
6581 unsigned Mask = VT.getVectorNumElements() - 1;
6582 if (Index <= Mask)
6583 return Op;
6584 }
6585
6586 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6587 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6588 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6589 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6590 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6591 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6592 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6593}
6594
6595SDValue
6596SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6597 SelectionDAG &DAG) const {
6598 // Handle extractions of floating-point values.
6599 SDLoc DL(Op);
6600 SDValue Op0 = Op.getOperand(0);
6601 SDValue Op1 = Op.getOperand(1);
6602 EVT VT = Op.getValueType();
6603 EVT VecVT = Op0.getValueType();
6604
6605 // Extractions of constant indices can be done directly.
6606 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6607 uint64_t Index = CIndexN->getZExtValue();
6608 unsigned Mask = VecVT.getVectorNumElements() - 1;
6609 if (Index <= Mask)
6610 return Op;
6611 }
6612
6613 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6614 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6615 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6616 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6617 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6618 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6619}
6620
6621SDValue SystemZTargetLowering::
6622lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6623 SDValue PackedOp = Op.getOperand(0);
6624 EVT OutVT = Op.getValueType();
6625 EVT InVT = PackedOp.getValueType();
6626 unsigned ToBits = OutVT.getScalarSizeInBits();
6627 unsigned FromBits = InVT.getScalarSizeInBits();
6628 unsigned StartOffset = 0;
6629
6630 // If the input is a VECTOR_SHUFFLE, there are a number of important
6631 // cases where we can directly implement the sign-extension of the
6632 // original input lanes of the shuffle.
6633 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6634 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6635 ArrayRef<int> ShuffleMask = SVN->getMask();
6636 int OutNumElts = OutVT.getVectorNumElements();
6637
6638 // Recognize the special case where the sign-extension can be done
6639 // by the VSEG instruction. Handled via the default expander.
6640 if (ToBits == 64 && OutNumElts == 2) {
6641 int NumElem = ToBits / FromBits;
6642 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6643 return SDValue();
6644 }
6645
6646 // Recognize the special case where we can fold the shuffle by
6647 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6648 int StartOffsetCandidate = -1;
6649 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6650 if (ShuffleMask[Elt] == -1)
6651 continue;
6652 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6653 if (StartOffsetCandidate == -1)
6654 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6655 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6656 continue;
6657 }
6658 StartOffsetCandidate = -1;
6659 break;
6660 }
6661 if (StartOffsetCandidate != -1) {
6662 StartOffset = StartOffsetCandidate;
6663 PackedOp = PackedOp.getOperand(0);
6664 }
6665 }
6666
6667 do {
6668 FromBits *= 2;
6669 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6670 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6671 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6672 if (StartOffset >= OutNumElts) {
6673 Opcode = SystemZISD::UNPACK_LOW;
6674 StartOffset -= OutNumElts;
6675 }
6676 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6677 } while (FromBits != ToBits);
6678 return PackedOp;
6679}
6680
6681// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6682SDValue SystemZTargetLowering::
6683lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6684 SDValue PackedOp = Op.getOperand(0);
6685 SDLoc DL(Op);
6686 EVT OutVT = Op.getValueType();
6687 EVT InVT = PackedOp.getValueType();
6688 unsigned InNumElts = InVT.getVectorNumElements();
6689 unsigned OutNumElts = OutVT.getVectorNumElements();
6690 unsigned NumInPerOut = InNumElts / OutNumElts;
6691
6692 SDValue ZeroVec =
6693 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6694
6695 SmallVector<int, 16> Mask(InNumElts);
6696 unsigned ZeroVecElt = InNumElts;
6697 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6698 unsigned MaskElt = PackedElt * NumInPerOut;
6699 unsigned End = MaskElt + NumInPerOut - 1;
6700 for (; MaskElt < End; MaskElt++)
6701 Mask[MaskElt] = ZeroVecElt++;
6702 Mask[MaskElt] = PackedElt;
6703 }
6704 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6705 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6706}
6707
6708SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6709 unsigned ByScalar) const {
6710 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6711 SDValue Op0 = Op.getOperand(0);
6712 SDValue Op1 = Op.getOperand(1);
6713 SDLoc DL(Op);
6714 EVT VT = Op.getValueType();
6715 unsigned ElemBitSize = VT.getScalarSizeInBits();
6716
6717 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6718 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6719 APInt SplatBits, SplatUndef;
6720 unsigned SplatBitSize;
6721 bool HasAnyUndefs;
6722 // Check for constant splats. Use ElemBitSize as the minimum element
6723 // width and reject splats that need wider elements.
6724 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6725 ElemBitSize, true) &&
6726 SplatBitSize == ElemBitSize) {
6727 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6728 DL, MVT::i32);
6729 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6730 }
6731 // Check for variable splats.
6732 BitVector UndefElements;
6733 SDValue Splat = BVN->getSplatValue(&UndefElements);
6734 if (Splat) {
6735 // Since i32 is the smallest legal type, we either need a no-op
6736 // or a truncation.
6737 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6738 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6739 }
6740 }
6741
6742 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6743 // and the shift amount is directly available in a GPR.
6744 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6745 if (VSN->isSplat()) {
6746 SDValue VSNOp0 = VSN->getOperand(0);
6747 unsigned Index = VSN->getSplatIndex();
6748 assert(Index < VT.getVectorNumElements() &&
6749 "Splat index should be defined and in first operand");
6750 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6751 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6752 // Since i32 is the smallest legal type, we either need a no-op
6753 // or a truncation.
6754 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6755 VSNOp0.getOperand(Index));
6756 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6757 }
6758 }
6759 }
6760
6761 // Otherwise just treat the current form as legal.
6762 return Op;
6763}
6764
6765SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6766 SDLoc DL(Op);
6767
6768 // i128 FSHL with a constant amount that is a multiple of 8 can be
6769 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6770 // facility, FSHL with a constant amount less than 8 can be implemented
6771 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6772 // combination of the two.
6773 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6774 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6775 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6776 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6777 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6778 if (ShiftAmt > 120) {
6779 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6780 // SHR_DOUBLE_BIT emits fewer instructions.
6781 SDValue Val =
6782 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6783 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6784 return DAG.getBitcast(MVT::i128, Val);
6785 }
6786 SmallVector<int, 16> Mask(16);
6787 for (unsigned Elt = 0; Elt < 16; Elt++)
6788 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6789 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6790 if ((ShiftAmt & 7) == 0)
6791 return DAG.getBitcast(MVT::i128, Shuf1);
6792 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6793 SDValue Val =
6794 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6795 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6796 return DAG.getBitcast(MVT::i128, Val);
6797 }
6798 }
6799
6800 return SDValue();
6801}
6802
6803SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6804 SDLoc DL(Op);
6805
6806 // i128 FSHR with a constant amount that is a multiple of 8 can be
6807 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6808 // facility, FSHR with a constant amount less than 8 can be implemented
6809 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6810 // combination of the two.
6811 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6812 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6813 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6814 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6815 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6816 if (ShiftAmt > 120) {
6817 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6818 // SHL_DOUBLE_BIT emits fewer instructions.
6819 SDValue Val =
6820 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6821 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6822 return DAG.getBitcast(MVT::i128, Val);
6823 }
6824 SmallVector<int, 16> Mask(16);
6825 for (unsigned Elt = 0; Elt < 16; Elt++)
6826 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6827 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6828 if ((ShiftAmt & 7) == 0)
6829 return DAG.getBitcast(MVT::i128, Shuf1);
6830 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6831 SDValue Val =
6832 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6833 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6834 return DAG.getBitcast(MVT::i128, Val);
6835 }
6836 }
6837
6838 return SDValue();
6839}
6840
6842 SDLoc DL(Op);
6843 SDValue Src = Op.getOperand(0);
6844 MVT DstVT = Op.getSimpleValueType();
6845
6847 unsigned SrcAS = N->getSrcAddressSpace();
6848
6849 assert(SrcAS != N->getDestAddressSpace() &&
6850 "addrspacecast must be between different address spaces");
6851
6852 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6853 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6854 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6855 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6856 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6857 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6858 } else if (DstVT == MVT::i32) {
6859 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6860 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6861 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6862 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6863 } else {
6864 report_fatal_error("Bad address space in addrspacecast");
6865 }
6866 return Op;
6867}
6868
6869SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6870 SelectionDAG &DAG) const {
6871 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6872 if (In.getSimpleValueType() != MVT::f16)
6873 return Op; // Legal
6874 return SDValue(); // Let legalizer emit the libcall.
6875}
6876
6878 MVT VT, SDValue Arg, SDLoc DL,
6879 SDValue Chain, bool IsStrict) const {
6880 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6881 MakeLibCallOptions CallOptions;
6882 SDValue Result;
6883 std::tie(Result, Chain) =
6884 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6885 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6886}
6887
6888SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6889 SelectionDAG &DAG) const {
6890 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6891 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6892 bool IsStrict = Op->isStrictFPOpcode();
6893 SDLoc DL(Op);
6894 MVT VT = Op.getSimpleValueType();
6895 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6896 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6897 EVT InVT = InOp.getValueType();
6898
6899 // FP to unsigned is not directly supported on z10. Promoting an i32
6900 // result to (signed) i64 doesn't generate an inexact condition (fp
6901 // exception) for values that are outside the i32 range but in the i64
6902 // range, so use the default expansion.
6903 if (!Subtarget.hasFPExtension() && !IsSigned)
6904 // Expand i32/i64. F16 values will be recognized to fit and extended.
6905 return SDValue();
6906
6907 // Conversion from f16 is done via f32.
6908 if (InOp.getSimpleValueType() == MVT::f16) {
6910 LowerOperationWrapper(Op.getNode(), Results, DAG);
6911 return DAG.getMergeValues(Results, DL);
6912 }
6913
6914 if (VT == MVT::i128) {
6915 RTLIB::Libcall LC =
6916 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6917 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6918 }
6919
6920 return Op; // Legal
6921}
6922
6923SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6924 SelectionDAG &DAG) const {
6925 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6926 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6927 bool IsStrict = Op->isStrictFPOpcode();
6928 SDLoc DL(Op);
6929 MVT VT = Op.getSimpleValueType();
6930 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6931 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6932 EVT InVT = InOp.getValueType();
6933
6934 // Conversion to f16 is done via f32.
6935 if (VT == MVT::f16) {
6937 LowerOperationWrapper(Op.getNode(), Results, DAG);
6938 return DAG.getMergeValues(Results, DL);
6939 }
6940
6941 // Unsigned to fp is not directly supported on z10.
6942 if (!Subtarget.hasFPExtension() && !IsSigned)
6943 return SDValue(); // Expand i64.
6944
6945 if (InVT == MVT::i128) {
6946 RTLIB::Libcall LC =
6947 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6948 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6949 }
6950
6951 return Op; // Legal
6952}
6953
6954// Shift the lower 2 bytes of Op to the left in order to insert into the
6955// upper 2 bytes of the FP register.
6957 assert(Op.getSimpleValueType() == MVT::i64 &&
6958 "Expexted to convert i64 to f16.");
6959 SDLoc DL(Op);
6960 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6961 DAG.getConstant(48, DL, MVT::i64));
6962 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6963 SDValue F16Val =
6964 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6965 return F16Val;
6966}
6967
6968// Extract Op into GPR and shift the 2 f16 bytes to the right.
6970 assert(Op.getSimpleValueType() == MVT::f16 &&
6971 "Expected to convert f16 to i64.");
6972 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6973 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6974 SDValue(U32, 0), Op);
6975 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6976 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6977 DAG.getConstant(48, DL, MVT::i32));
6978 return Shft;
6979}
6980
6981// Lower an f16 LOAD in case of no vector support.
6982SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6983 SelectionDAG &DAG) const {
6984 EVT RegVT = Op.getValueType();
6985 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6986 (void)RegVT;
6987
6988 // Load as integer.
6989 SDLoc DL(Op);
6990 SDValue NewLd;
6991 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6992 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6993 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
6994 AtomicLd->getChain(), AtomicLd->getBasePtr(),
6995 AtomicLd->getMemOperand());
6996 } else {
6997 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6998 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
6999 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7000 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7001 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7002 }
7003 SDValue F16Val = convertToF16(NewLd, DAG);
7004 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7005}
7006
7007// Lower an f16 STORE in case of no vector support.
7008SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7009 SelectionDAG &DAG) const {
7010 SDLoc DL(Op);
7011 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7012
7013 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7014 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7015 Shft, AtomicSt->getBasePtr(),
7016 AtomicSt->getMemOperand());
7017
7018 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7019 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7020 St->getMemOperand());
7021}
7022
7023SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7024 SelectionDAG &DAG) const {
7025 SDLoc DL(Op);
7026 MVT ResultVT = Op.getSimpleValueType();
7027 SDValue Arg = Op.getOperand(0);
7028 unsigned Check = Op.getConstantOperandVal(1);
7029
7030 unsigned TDCMask = 0;
7031 if (Check & fcSNan)
7033 if (Check & fcQNan)
7035 if (Check & fcPosInf)
7037 if (Check & fcNegInf)
7039 if (Check & fcPosNormal)
7041 if (Check & fcNegNormal)
7043 if (Check & fcPosSubnormal)
7045 if (Check & fcNegSubnormal)
7047 if (Check & fcPosZero)
7048 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7049 if (Check & fcNegZero)
7050 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7051 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7052
7053 if (Arg.getSimpleValueType() == MVT::f16)
7054 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7055 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7056 return getCCResult(DAG, Intr);
7057}
7058
7059SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7060 SelectionDAG &DAG) const {
7061 SDLoc DL(Op);
7062 SDValue Chain = Op.getOperand(0);
7063
7064 // STCKF only supports a memory operand, so we have to use a temporary.
7065 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7066 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7067 MachinePointerInfo MPI =
7069
7070 // Use STCFK to store the TOD clock into the temporary.
7071 SDValue StoreOps[] = {Chain, StackPtr};
7072 Chain = DAG.getMemIntrinsicNode(
7073 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7074 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7075
7076 // And read it back from there.
7077 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7078}
7079
7081 SelectionDAG &DAG) const {
7082 switch (Op.getOpcode()) {
7083 case ISD::FRAMEADDR:
7084 return lowerFRAMEADDR(Op, DAG);
7085 case ISD::RETURNADDR:
7086 return lowerRETURNADDR(Op, DAG);
7087 case ISD::BR_CC:
7088 return lowerBR_CC(Op, DAG);
7089 case ISD::SELECT_CC:
7090 return lowerSELECT_CC(Op, DAG);
7091 case ISD::SETCC:
7092 return lowerSETCC(Op, DAG);
7093 case ISD::STRICT_FSETCC:
7094 return lowerSTRICT_FSETCC(Op, DAG, false);
7096 return lowerSTRICT_FSETCC(Op, DAG, true);
7097 case ISD::GlobalAddress:
7098 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7100 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7101 case ISD::BlockAddress:
7102 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7103 case ISD::JumpTable:
7104 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7105 case ISD::ConstantPool:
7106 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7107 case ISD::BITCAST:
7108 return lowerBITCAST(Op, DAG);
7109 case ISD::VASTART:
7110 return lowerVASTART(Op, DAG);
7111 case ISD::VACOPY:
7112 return lowerVACOPY(Op, DAG);
7114 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7116 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7117 case ISD::MULHS:
7118 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7119 case ISD::MULHU:
7120 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7121 case ISD::SMUL_LOHI:
7122 return lowerSMUL_LOHI(Op, DAG);
7123 case ISD::UMUL_LOHI:
7124 return lowerUMUL_LOHI(Op, DAG);
7125 case ISD::SDIVREM:
7126 return lowerSDIVREM(Op, DAG);
7127 case ISD::UDIVREM:
7128 return lowerUDIVREM(Op, DAG);
7129 case ISD::SADDO:
7130 case ISD::SSUBO:
7131 case ISD::UADDO:
7132 case ISD::USUBO:
7133 return lowerXALUO(Op, DAG);
7134 case ISD::UADDO_CARRY:
7135 case ISD::USUBO_CARRY:
7136 return lowerUADDSUBO_CARRY(Op, DAG);
7137 case ISD::OR:
7138 return lowerOR(Op, DAG);
7139 case ISD::CTPOP:
7140 return lowerCTPOP(Op, DAG);
7141 case ISD::VECREDUCE_ADD:
7142 return lowerVECREDUCE_ADD(Op, DAG);
7143 case ISD::ATOMIC_FENCE:
7144 return lowerATOMIC_FENCE(Op, DAG);
7145 case ISD::ATOMIC_SWAP:
7146 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7147 case ISD::ATOMIC_STORE:
7148 return lowerATOMIC_STORE(Op, DAG);
7149 case ISD::ATOMIC_LOAD:
7150 return lowerATOMIC_LOAD(Op, DAG);
7152 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7154 return lowerATOMIC_LOAD_SUB(Op, DAG);
7156 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7158 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7160 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7162 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7164 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7166 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7168 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7170 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7172 return lowerATOMIC_CMP_SWAP(Op, DAG);
7173 case ISD::STACKSAVE:
7174 return lowerSTACKSAVE(Op, DAG);
7175 case ISD::STACKRESTORE:
7176 return lowerSTACKRESTORE(Op, DAG);
7177 case ISD::PREFETCH:
7178 return lowerPREFETCH(Op, DAG);
7180 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7182 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7183 case ISD::BUILD_VECTOR:
7184 return lowerBUILD_VECTOR(Op, DAG);
7186 return lowerVECTOR_SHUFFLE(Op, DAG);
7188 return lowerSCALAR_TO_VECTOR(Op, DAG);
7190 return lowerINSERT_VECTOR_ELT(Op, DAG);
7192 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7194 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7196 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7197 case ISD::SHL:
7198 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7199 case ISD::SRL:
7200 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7201 case ISD::SRA:
7202 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7203 case ISD::ADDRSPACECAST:
7204 return lowerAddrSpaceCast(Op, DAG);
7205 case ISD::ROTL:
7206 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7207 case ISD::FSHL:
7208 return lowerFSHL(Op, DAG);
7209 case ISD::FSHR:
7210 return lowerFSHR(Op, DAG);
7211 case ISD::FP_EXTEND:
7213 return lowerFP_EXTEND(Op, DAG);
7214 case ISD::FP_TO_UINT:
7215 case ISD::FP_TO_SINT:
7218 return lower_FP_TO_INT(Op, DAG);
7219 case ISD::UINT_TO_FP:
7220 case ISD::SINT_TO_FP:
7223 return lower_INT_TO_FP(Op, DAG);
7224 case ISD::LOAD:
7225 return lowerLoadF16(Op, DAG);
7226 case ISD::STORE:
7227 return lowerStoreF16(Op, DAG);
7228 case ISD::IS_FPCLASS:
7229 return lowerIS_FPCLASS(Op, DAG);
7230 case ISD::GET_ROUNDING:
7231 return lowerGET_ROUNDING(Op, DAG);
7233 return lowerREADCYCLECOUNTER(Op, DAG);
7236 // These operations are legal on our platform, but we cannot actually
7237 // set the operation action to Legal as common code would treat this
7238 // as equivalent to Expand. Instead, we keep the operation action to
7239 // Custom and just leave them unchanged here.
7240 return Op;
7241
7242 default:
7243 llvm_unreachable("Unexpected node to lower");
7244 }
7245}
7246
7248 const SDLoc &SL) {
7249 // If i128 is legal, just use a normal bitcast.
7250 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7251 return DAG.getBitcast(MVT::f128, Src);
7252
7253 // Otherwise, f128 must live in FP128, so do a partwise move.
7255 &SystemZ::FP128BitRegClass);
7256
7257 SDValue Hi, Lo;
7258 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7259
7260 Hi = DAG.getBitcast(MVT::f64, Hi);
7261 Lo = DAG.getBitcast(MVT::f64, Lo);
7262
7263 SDNode *Pair = DAG.getMachineNode(
7264 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7265 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7266 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7267 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7268 return SDValue(Pair, 0);
7269}
7270
7272 const SDLoc &SL) {
7273 // If i128 is legal, just use a normal bitcast.
7274 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7275 return DAG.getBitcast(MVT::i128, Src);
7276
7277 // Otherwise, f128 must live in FP128, so do a partwise move.
7279 &SystemZ::FP128BitRegClass);
7280
7281 SDValue LoFP =
7282 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7283 SDValue HiFP =
7284 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7285 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7286 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7287
7288 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7289}
7290
7291// Lower operations with invalid operand or result types.
7292void
7295 SelectionDAG &DAG) const {
7296 switch (N->getOpcode()) {
7297 case ISD::ATOMIC_LOAD: {
7298 SDLoc DL(N);
7299 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7300 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7301 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7302 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7303 DL, Tys, Ops, MVT::i128, MMO);
7304
7305 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7306 if (N->getValueType(0) == MVT::f128)
7307 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7308 Results.push_back(Lowered);
7309 Results.push_back(Res.getValue(1));
7310 break;
7311 }
7312 case ISD::ATOMIC_STORE: {
7313 SDLoc DL(N);
7314 SDVTList Tys = DAG.getVTList(MVT::Other);
7315 SDValue Val = N->getOperand(1);
7316 if (Val.getValueType() == MVT::f128)
7317 Val = expandBitCastF128ToI128(DAG, Val, DL);
7318 Val = lowerI128ToGR128(DAG, Val);
7319
7320 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7321 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7322 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7323 DL, Tys, Ops, MVT::i128, MMO);
7324 // We have to enforce sequential consistency by performing a
7325 // serialization operation after the store.
7326 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7328 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7329 MVT::Other, Res), 0);
7330 Results.push_back(Res);
7331 break;
7332 }
7334 SDLoc DL(N);
7335 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7336 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7337 lowerI128ToGR128(DAG, N->getOperand(2)),
7338 lowerI128ToGR128(DAG, N->getOperand(3)) };
7339 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7340 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7341 DL, Tys, Ops, MVT::i128, MMO);
7342 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7344 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7345 Results.push_back(lowerGR128ToI128(DAG, Res));
7346 Results.push_back(Success);
7347 Results.push_back(Res.getValue(2));
7348 break;
7349 }
7350 case ISD::BITCAST: {
7351 if (useSoftFloat())
7352 return;
7353 SDLoc DL(N);
7354 SDValue Src = N->getOperand(0);
7355 EVT SrcVT = Src.getValueType();
7356 EVT ResVT = N->getValueType(0);
7357 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7358 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7359 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7360 if (Subtarget.hasVector()) {
7361 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7362 Results.push_back(SDValue(
7363 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7364 } else {
7365 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7366 Results.push_back(convertToF16(In64, DAG));
7367 }
7368 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7369 SDValue ExtractedVal =
7370 Subtarget.hasVector()
7371 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7372 0)
7373 : convertFromF16(Src, DL, DAG);
7374 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7375 }
7376 break;
7377 }
7378 case ISD::UINT_TO_FP:
7379 case ISD::SINT_TO_FP:
7382 if (useSoftFloat())
7383 return;
7384 bool IsStrict = N->isStrictFPOpcode();
7385 SDLoc DL(N);
7386 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7387 EVT ResVT = N->getValueType(0);
7388 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7389 if (ResVT == MVT::f16) {
7390 if (!IsStrict) {
7391 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7392 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7393 } else {
7394 SDValue OpF32 =
7395 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7396 {Chain, InOp});
7397 SDValue F16Res;
7398 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7399 OpF32, OpF32.getValue(1), DL, MVT::f16);
7400 Results.push_back(F16Res);
7401 Results.push_back(Chain);
7402 }
7403 }
7404 break;
7405 }
7406 case ISD::FP_TO_UINT:
7407 case ISD::FP_TO_SINT:
7410 if (useSoftFloat())
7411 return;
7412 bool IsStrict = N->isStrictFPOpcode();
7413 SDLoc DL(N);
7414 EVT ResVT = N->getValueType(0);
7415 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7416 EVT InVT = InOp->getValueType(0);
7417 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7418 if (InVT == MVT::f16) {
7419 if (!IsStrict) {
7420 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7421 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7422 } else {
7423 SDValue InF32;
7424 std::tie(InF32, Chain) =
7425 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7426 SDValue OpF32 =
7427 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7428 {Chain, InF32});
7429 Results.push_back(OpF32);
7430 Results.push_back(OpF32.getValue(1));
7431 }
7432 }
7433 break;
7434 }
7435 default:
7436 llvm_unreachable("Unexpected node to lower");
7437 }
7438}
7439
7440void
7446
7447// Return true if VT is a vector whose elements are a whole number of bytes
7448// in width. Also check for presence of vector support.
7449bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7450 if (!Subtarget.hasVector())
7451 return false;
7452
7453 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7454}
7455
7456// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7457// producing a result of type ResVT. Op is a possibly bitcast version
7458// of the input vector and Index is the index (based on type VecVT) that
7459// should be extracted. Return the new extraction if a simplification
7460// was possible or if Force is true.
7461SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7462 EVT VecVT, SDValue Op,
7463 unsigned Index,
7464 DAGCombinerInfo &DCI,
7465 bool Force) const {
7466 SelectionDAG &DAG = DCI.DAG;
7467
7468 // The number of bytes being extracted.
7469 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7470
7471 for (;;) {
7472 unsigned Opcode = Op.getOpcode();
7473 if (Opcode == ISD::BITCAST)
7474 // Look through bitcasts.
7475 Op = Op.getOperand(0);
7476 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7477 canTreatAsByteVector(Op.getValueType())) {
7478 // Get a VPERM-like permute mask and see whether the bytes covered
7479 // by the extracted element are a contiguous sequence from one
7480 // source operand.
7482 if (!getVPermMask(Op, Bytes))
7483 break;
7484 int First;
7485 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7486 BytesPerElement, First))
7487 break;
7488 if (First < 0)
7489 return DAG.getUNDEF(ResVT);
7490 // Make sure the contiguous sequence starts at a multiple of the
7491 // original element size.
7492 unsigned Byte = unsigned(First) % Bytes.size();
7493 if (Byte % BytesPerElement != 0)
7494 break;
7495 // We can get the extracted value directly from an input.
7496 Index = Byte / BytesPerElement;
7497 Op = Op.getOperand(unsigned(First) / Bytes.size());
7498 Force = true;
7499 } else if (Opcode == ISD::BUILD_VECTOR &&
7500 canTreatAsByteVector(Op.getValueType())) {
7501 // We can only optimize this case if the BUILD_VECTOR elements are
7502 // at least as wide as the extracted value.
7503 EVT OpVT = Op.getValueType();
7504 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7505 if (OpBytesPerElement < BytesPerElement)
7506 break;
7507 // Make sure that the least-significant bit of the extracted value
7508 // is the least significant bit of an input.
7509 unsigned End = (Index + 1) * BytesPerElement;
7510 if (End % OpBytesPerElement != 0)
7511 break;
7512 // We're extracting the low part of one operand of the BUILD_VECTOR.
7513 Op = Op.getOperand(End / OpBytesPerElement - 1);
7514 if (!Op.getValueType().isInteger()) {
7515 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7516 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7517 DCI.AddToWorklist(Op.getNode());
7518 }
7519 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7520 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7521 if (VT != ResVT) {
7522 DCI.AddToWorklist(Op.getNode());
7523 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7524 }
7525 return Op;
7526 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7528 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7529 canTreatAsByteVector(Op.getValueType()) &&
7530 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7531 // Make sure that only the unextended bits are significant.
7532 EVT ExtVT = Op.getValueType();
7533 EVT OpVT = Op.getOperand(0).getValueType();
7534 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7535 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7536 unsigned Byte = Index * BytesPerElement;
7537 unsigned SubByte = Byte % ExtBytesPerElement;
7538 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7539 if (SubByte < MinSubByte ||
7540 SubByte + BytesPerElement > ExtBytesPerElement)
7541 break;
7542 // Get the byte offset of the unextended element
7543 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7544 // ...then add the byte offset relative to that element.
7545 Byte += SubByte - MinSubByte;
7546 if (Byte % BytesPerElement != 0)
7547 break;
7548 Op = Op.getOperand(0);
7549 Index = Byte / BytesPerElement;
7550 Force = true;
7551 } else
7552 break;
7553 }
7554 if (Force) {
7555 if (Op.getValueType() != VecVT) {
7556 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7557 DCI.AddToWorklist(Op.getNode());
7558 }
7559 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7560 DAG.getConstant(Index, DL, MVT::i32));
7561 }
7562 return SDValue();
7563}
7564
7565// Optimize vector operations in scalar value Op on the basis that Op
7566// is truncated to TruncVT.
7567SDValue SystemZTargetLowering::combineTruncateExtract(
7568 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7569 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7570 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7571 // of type TruncVT.
7572 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7573 TruncVT.getSizeInBits() % 8 == 0) {
7574 SDValue Vec = Op.getOperand(0);
7575 EVT VecVT = Vec.getValueType();
7576 if (canTreatAsByteVector(VecVT)) {
7577 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7578 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7579 unsigned TruncBytes = TruncVT.getStoreSize();
7580 if (BytesPerElement % TruncBytes == 0) {
7581 // Calculate the value of Y' in the above description. We are
7582 // splitting the original elements into Scale equal-sized pieces
7583 // and for truncation purposes want the last (least-significant)
7584 // of these pieces for IndexN. This is easiest to do by calculating
7585 // the start index of the following element and then subtracting 1.
7586 unsigned Scale = BytesPerElement / TruncBytes;
7587 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7588
7589 // Defer the creation of the bitcast from X to combineExtract,
7590 // which might be able to optimize the extraction.
7591 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7592 MVT::getIntegerVT(TruncBytes * 8),
7593 VecVT.getStoreSize() / TruncBytes);
7594 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7595 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7596 }
7597 }
7598 }
7599 }
7600 return SDValue();
7601}
7602
7603SDValue SystemZTargetLowering::combineZERO_EXTEND(
7604 SDNode *N, DAGCombinerInfo &DCI) const {
7605 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7606 SelectionDAG &DAG = DCI.DAG;
7607 SDValue N0 = N->getOperand(0);
7608 EVT VT = N->getValueType(0);
7609 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7610 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7611 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7612 if (TrueOp && FalseOp) {
7613 SDLoc DL(N0);
7614 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7615 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7616 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7617 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7618 // If N0 has multiple uses, change other uses as well.
7619 if (!N0.hasOneUse()) {
7620 SDValue TruncSelect =
7621 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7622 DCI.CombineTo(N0.getNode(), TruncSelect);
7623 }
7624 return NewSelect;
7625 }
7626 }
7627 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7628 // of the result is smaller than the size of X and all the truncated bits
7629 // of X are already zero.
7630 if (N0.getOpcode() == ISD::XOR &&
7631 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7632 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7633 N0.getOperand(1).getOpcode() == ISD::Constant) {
7634 SDValue X = N0.getOperand(0).getOperand(0);
7635 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7636 KnownBits Known = DAG.computeKnownBits(X);
7637 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7638 N0.getValueSizeInBits(),
7639 VT.getSizeInBits());
7640 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7641 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7642 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7643 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7644 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7645 }
7646 }
7647 }
7648 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7649 // and VECTOR ADD COMPUTE CARRY for i128:
7650 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7651 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7652 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7653 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7654 // For vector types, these patterns are recognized in the .td file.
7655 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7656 N0.getOperand(0).getValueType() == VT) {
7657 SDValue Op0 = N0.getOperand(0);
7658 SDValue Op1 = N0.getOperand(1);
7659 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7660 switch (CC) {
7661 case ISD::SETULE:
7662 std::swap(Op0, Op1);
7663 [[fallthrough]];
7664 case ISD::SETUGE:
7665 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7666 case ISD::SETUGT:
7667 std::swap(Op0, Op1);
7668 [[fallthrough]];
7669 case ISD::SETULT:
7670 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7671 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7672 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7673 Op0->getOperand(1));
7674 break;
7675 default:
7676 break;
7677 }
7678 }
7679
7680 return SDValue();
7681}
7682
7683SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7684 SDNode *N, DAGCombinerInfo &DCI) const {
7685 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7686 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7687 // into (select_cc LHS, RHS, -1, 0, COND)
7688 SelectionDAG &DAG = DCI.DAG;
7689 SDValue N0 = N->getOperand(0);
7690 EVT VT = N->getValueType(0);
7691 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7692 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7693 N0 = N0.getOperand(0);
7694 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7695 SDLoc DL(N0);
7696 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7697 DAG.getAllOnesConstant(DL, VT),
7698 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7699 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7700 }
7701 return SDValue();
7702}
7703
7704SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7705 SDNode *N, DAGCombinerInfo &DCI) const {
7706 // Convert (sext (ashr (shl X, C1), C2)) to
7707 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7708 // cheap as narrower ones.
7709 SelectionDAG &DAG = DCI.DAG;
7710 SDValue N0 = N->getOperand(0);
7711 EVT VT = N->getValueType(0);
7712 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7713 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7714 SDValue Inner = N0.getOperand(0);
7715 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7716 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7717 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7718 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7719 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7720 EVT ShiftVT = N0.getOperand(1).getValueType();
7721 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7722 Inner.getOperand(0));
7723 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7724 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7725 ShiftVT));
7726 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7727 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7728 }
7729 }
7730 }
7731
7732 return SDValue();
7733}
7734
7735SDValue SystemZTargetLowering::combineMERGE(
7736 SDNode *N, DAGCombinerInfo &DCI) const {
7737 SelectionDAG &DAG = DCI.DAG;
7738 unsigned Opcode = N->getOpcode();
7739 SDValue Op0 = N->getOperand(0);
7740 SDValue Op1 = N->getOperand(1);
7741 if (Op0.getOpcode() == ISD::BITCAST)
7742 Op0 = Op0.getOperand(0);
7744 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7745 // for v4f32.
7746 if (Op1 == N->getOperand(0))
7747 return Op1;
7748 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7749 EVT VT = Op1.getValueType();
7750 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7751 if (ElemBytes <= 4) {
7752 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7753 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7754 EVT InVT = VT.changeVectorElementTypeToInteger();
7755 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7756 SystemZ::VectorBytes / ElemBytes / 2);
7757 if (VT != InVT) {
7758 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7759 DCI.AddToWorklist(Op1.getNode());
7760 }
7761 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7762 DCI.AddToWorklist(Op.getNode());
7763 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7764 }
7765 }
7766 return SDValue();
7767}
7768
7769static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7770 SDNode *&HiPart) {
7771 LoPart = HiPart = nullptr;
7772
7773 // Scan through all users.
7774 for (SDUse &Use : LD->uses()) {
7775 // Skip the uses of the chain.
7776 if (Use.getResNo() != 0)
7777 continue;
7778
7779 // Verify every user is a TRUNCATE to i64 of the low or high half.
7780 SDNode *User = Use.getUser();
7781 bool IsLoPart = true;
7782 if (User->getOpcode() == ISD::SRL &&
7783 User->getOperand(1).getOpcode() == ISD::Constant &&
7784 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7785 User = *User->user_begin();
7786 IsLoPart = false;
7787 }
7788 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7789 return false;
7790
7791 if (IsLoPart) {
7792 if (LoPart)
7793 return false;
7794 LoPart = User;
7795 } else {
7796 if (HiPart)
7797 return false;
7798 HiPart = User;
7799 }
7800 }
7801 return true;
7802}
7803
7804static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7805 SDNode *&HiPart) {
7806 LoPart = HiPart = nullptr;
7807
7808 // Scan through all users.
7809 for (SDUse &Use : LD->uses()) {
7810 // Skip the uses of the chain.
7811 if (Use.getResNo() != 0)
7812 continue;
7813
7814 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7815 SDNode *User = Use.getUser();
7816 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7817 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7818 return false;
7819
7820 switch (User->getConstantOperandVal(1)) {
7821 case SystemZ::subreg_l64:
7822 if (LoPart)
7823 return false;
7824 LoPart = User;
7825 break;
7826 case SystemZ::subreg_h64:
7827 if (HiPart)
7828 return false;
7829 HiPart = User;
7830 break;
7831 default:
7832 return false;
7833 }
7834 }
7835 return true;
7836}
7837
7838SDValue SystemZTargetLowering::combineLOAD(
7839 SDNode *N, DAGCombinerInfo &DCI) const {
7840 SelectionDAG &DAG = DCI.DAG;
7841 EVT LdVT = N->getValueType(0);
7842 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7843 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7844 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7845 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7846 if (PtrVT != LoadNodeVT) {
7847 SDLoc DL(LN);
7848 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7849 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7850 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7851 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7852 LN->getMemOperand());
7853 }
7854 }
7855 }
7856 SDLoc DL(N);
7857
7858 // Replace a 128-bit load that is used solely to move its value into GPRs
7859 // by separate loads of both halves.
7860 LoadSDNode *LD = cast<LoadSDNode>(N);
7861 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7862 SDNode *LoPart, *HiPart;
7863 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7864 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7865 // Rewrite each extraction as an independent load.
7866 SmallVector<SDValue, 2> ArgChains;
7867 if (HiPart) {
7868 SDValue EltLoad = DAG.getLoad(
7869 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7870 LD->getPointerInfo(), LD->getBaseAlign(),
7871 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7872
7873 DCI.CombineTo(HiPart, EltLoad, true);
7874 ArgChains.push_back(EltLoad.getValue(1));
7875 }
7876 if (LoPart) {
7877 SDValue EltLoad = DAG.getLoad(
7878 LoPart->getValueType(0), DL, LD->getChain(),
7879 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7880 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7881 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7882
7883 DCI.CombineTo(LoPart, EltLoad, true);
7884 ArgChains.push_back(EltLoad.getValue(1));
7885 }
7886
7887 // Collect all chains via TokenFactor.
7888 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7889 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7890 DCI.AddToWorklist(Chain.getNode());
7891 return SDValue(N, 0);
7892 }
7893 }
7894
7895 if (LdVT.isVector() || LdVT.isInteger())
7896 return SDValue();
7897 // Transform a scalar load that is REPLICATEd as well as having other
7898 // use(s) to the form where the other use(s) use the first element of the
7899 // REPLICATE instead of the load. Otherwise instruction selection will not
7900 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7901 // point loads.
7902
7903 SDValue Replicate;
7904 SmallVector<SDNode*, 8> OtherUses;
7905 for (SDUse &Use : N->uses()) {
7906 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7907 if (Replicate)
7908 return SDValue(); // Should never happen
7909 Replicate = SDValue(Use.getUser(), 0);
7910 } else if (Use.getResNo() == 0)
7911 OtherUses.push_back(Use.getUser());
7912 }
7913 if (!Replicate || OtherUses.empty())
7914 return SDValue();
7915
7916 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7917 Replicate, DAG.getConstant(0, DL, MVT::i32));
7918 // Update uses of the loaded Value while preserving old chains.
7919 for (SDNode *U : OtherUses) {
7921 for (SDValue Op : U->ops())
7922 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7923 DAG.UpdateNodeOperands(U, Ops);
7924 }
7925 return SDValue(N, 0);
7926}
7927
7928bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7929 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7930 return true;
7931 if (Subtarget.hasVectorEnhancements2())
7932 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7933 return true;
7934 return false;
7935}
7936
7938 if (!VT.isVector() || !VT.isSimple() ||
7939 VT.getSizeInBits() != 128 ||
7940 VT.getScalarSizeInBits() % 8 != 0)
7941 return false;
7942
7943 unsigned NumElts = VT.getVectorNumElements();
7944 for (unsigned i = 0; i < NumElts; ++i) {
7945 if (M[i] < 0) continue; // ignore UNDEF indices
7946 if ((unsigned) M[i] != NumElts - 1 - i)
7947 return false;
7948 }
7949
7950 return true;
7951}
7952
7953static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7954 for (auto *U : StoredVal->users()) {
7955 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7956 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7957 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7958 continue;
7959 } else if (isa<BuildVectorSDNode>(U)) {
7960 SDValue BuildVector = SDValue(U, 0);
7961 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7962 isOnlyUsedByStores(BuildVector, DAG))
7963 continue;
7964 }
7965 return false;
7966 }
7967 return true;
7968}
7969
7970static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7971 SDValue &HiPart) {
7972 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7973 return false;
7974
7975 SDValue Op0 = Val.getOperand(0);
7976 SDValue Op1 = Val.getOperand(1);
7977
7978 if (Op0.getOpcode() == ISD::SHL)
7979 std::swap(Op0, Op1);
7980 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7981 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7982 Op1.getConstantOperandVal(1) != 64)
7983 return false;
7984 Op1 = Op1.getOperand(0);
7985
7986 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7987 Op0.getOperand(0).getValueType() != MVT::i64)
7988 return false;
7989 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7990 Op1.getOperand(0).getValueType() != MVT::i64)
7991 return false;
7992
7993 LoPart = Op0.getOperand(0);
7994 HiPart = Op1.getOperand(0);
7995 return true;
7996}
7997
7998static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7999 SDValue &HiPart) {
8000 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8001 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8002 return false;
8003
8004 if (Val->getNumOperands() != 5 ||
8005 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8006 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8007 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8008 return false;
8009
8010 LoPart = Val->getOperand(1);
8011 HiPart = Val->getOperand(3);
8012 return true;
8013}
8014
8015SDValue SystemZTargetLowering::combineSTORE(
8016 SDNode *N, DAGCombinerInfo &DCI) const {
8017 SelectionDAG &DAG = DCI.DAG;
8018 auto *SN = cast<StoreSDNode>(N);
8019 auto &Op1 = N->getOperand(1);
8020 EVT MemVT = SN->getMemoryVT();
8021
8022 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8023 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8024 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8025 if (PtrVT != StoreNodeVT) {
8026 SDLoc DL(SN);
8027 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8028 SYSTEMZAS::PTR32, 0);
8029 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8030 SN->getPointerInfo(), SN->getBaseAlign(),
8031 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8032 }
8033 }
8034
8035 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8036 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8037 // If X has wider elements then convert it to:
8038 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8039 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8040 if (SDValue Value =
8041 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8042 DCI.AddToWorklist(Value.getNode());
8043
8044 // Rewrite the store with the new form of stored value.
8045 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8046 SN->getBasePtr(), SN->getMemoryVT(),
8047 SN->getMemOperand());
8048 }
8049 }
8050 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8051 if (!SN->isTruncatingStore() &&
8052 Op1.getOpcode() == ISD::BSWAP &&
8053 Op1.getNode()->hasOneUse() &&
8054 canLoadStoreByteSwapped(Op1.getValueType())) {
8055
8056 SDValue BSwapOp = Op1.getOperand(0);
8057
8058 if (BSwapOp.getValueType() == MVT::i16)
8059 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8060
8061 SDValue Ops[] = {
8062 N->getOperand(0), BSwapOp, N->getOperand(2)
8063 };
8064
8065 return
8066 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8067 Ops, MemVT, SN->getMemOperand());
8068 }
8069 // Combine STORE (element-swap) into VSTER
8070 if (!SN->isTruncatingStore() &&
8071 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8072 Op1.getNode()->hasOneUse() &&
8073 Subtarget.hasVectorEnhancements2()) {
8074 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8075 ArrayRef<int> ShuffleMask = SVN->getMask();
8076 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8077 SDValue Ops[] = {
8078 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8079 };
8080
8081 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8082 DAG.getVTList(MVT::Other),
8083 Ops, MemVT, SN->getMemOperand());
8084 }
8085 }
8086
8087 // Combine STORE (READCYCLECOUNTER) into STCKF.
8088 if (!SN->isTruncatingStore() &&
8090 Op1.hasOneUse() &&
8091 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8092 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8093 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8094 DAG.getVTList(MVT::Other),
8095 Ops, MemVT, SN->getMemOperand());
8096 }
8097
8098 // Transform a store of a 128-bit value moved from parts into two stores.
8099 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8100 SDValue LoPart, HiPart;
8101 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8102 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8103 SDLoc DL(SN);
8104 SDValue Chain0 = DAG.getStore(
8105 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8106 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8107 SDValue Chain1 = DAG.getStore(
8108 SN->getChain(), DL, LoPart,
8109 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8110 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8111 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8112
8113 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8114 }
8115 }
8116
8117 // Replicate a reg or immediate with VREP instead of scalar multiply or
8118 // immediate load. It seems best to do this during the first DAGCombine as
8119 // it is straight-forward to handle the zero-extend node in the initial
8120 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8121 // extracting an i16 element from a v16i8 vector).
8122 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8123 isOnlyUsedByStores(Op1, DAG)) {
8124 SDValue Word = SDValue();
8125 EVT WordVT;
8126
8127 // Find a replicated immediate and return it if found in Word and its
8128 // type in WordVT.
8129 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8130 // Some constants are better handled with a scalar store.
8131 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8132 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8133 return;
8134
8135 APInt Val = C->getAPIntValue();
8136 // Truncate Val in case of a truncating store.
8137 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8138 assert(SN->isTruncatingStore() &&
8139 "Non-truncating store and immediate value does not fit?");
8140 Val = Val.trunc(TotBytes * 8);
8141 }
8142
8143 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8144 if (VCI.isVectorConstantLegal(Subtarget) &&
8145 VCI.Opcode == SystemZISD::REPLICATE) {
8146 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8147 WordVT = VCI.VecVT.getScalarType();
8148 }
8149 };
8150
8151 // Find a replicated register and return it if found in Word and its type
8152 // in WordVT.
8153 auto FindReplicatedReg = [&](SDValue MulOp) {
8154 EVT MulVT = MulOp.getValueType();
8155 if (MulOp->getOpcode() == ISD::MUL &&
8156 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8157 // Find a zero extended value and its type.
8158 SDValue LHS = MulOp->getOperand(0);
8159 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8160 WordVT = LHS->getOperand(0).getValueType();
8161 else if (LHS->getOpcode() == ISD::AssertZext)
8162 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8163 else
8164 return;
8165 // Find a replicating constant, e.g. 0x00010001.
8166 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8167 SystemZVectorConstantInfo VCI(
8168 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8169 if (VCI.isVectorConstantLegal(Subtarget) &&
8170 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8171 WordVT == VCI.VecVT.getScalarType())
8172 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8173 }
8174 }
8175 };
8176
8177 if (isa<BuildVectorSDNode>(Op1) &&
8178 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8179 SDValue SplatVal = Op1->getOperand(0);
8180 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8181 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8182 else
8183 FindReplicatedReg(SplatVal);
8184 } else {
8185 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8186 FindReplicatedImm(C, MemVT.getStoreSize());
8187 else
8188 FindReplicatedReg(Op1);
8189 }
8190
8191 if (Word != SDValue()) {
8192 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8193 "Bad type handling");
8194 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8195 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8196 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8197 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8198 SN->getBasePtr(), SN->getMemOperand());
8199 }
8200 }
8201
8202 return SDValue();
8203}
8204
8205SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8206 SDNode *N, DAGCombinerInfo &DCI) const {
8207 SelectionDAG &DAG = DCI.DAG;
8208 // Combine element-swap (LOAD) into VLER
8209 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8210 N->getOperand(0).hasOneUse() &&
8211 Subtarget.hasVectorEnhancements2()) {
8212 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8213 ArrayRef<int> ShuffleMask = SVN->getMask();
8214 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8215 SDValue Load = N->getOperand(0);
8216 LoadSDNode *LD = cast<LoadSDNode>(Load);
8217
8218 // Create the element-swapping load.
8219 SDValue Ops[] = {
8220 LD->getChain(), // Chain
8221 LD->getBasePtr() // Ptr
8222 };
8223 SDValue ESLoad =
8224 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8225 DAG.getVTList(LD->getValueType(0), MVT::Other),
8226 Ops, LD->getMemoryVT(), LD->getMemOperand());
8227
8228 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8229 // by the load dead.
8230 DCI.CombineTo(N, ESLoad);
8231
8232 // Next, combine the load away, we give it a bogus result value but a real
8233 // chain result. The result value is dead because the shuffle is dead.
8234 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8235
8236 // Return N so it doesn't get rechecked!
8237 return SDValue(N, 0);
8238 }
8239 }
8240
8241 return SDValue();
8242}
8243
8244SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8245 SDNode *N, DAGCombinerInfo &DCI) const {
8246 SelectionDAG &DAG = DCI.DAG;
8247
8248 if (!Subtarget.hasVector())
8249 return SDValue();
8250
8251 // Look through bitcasts that retain the number of vector elements.
8252 SDValue Op = N->getOperand(0);
8253 if (Op.getOpcode() == ISD::BITCAST &&
8254 Op.getValueType().isVector() &&
8255 Op.getOperand(0).getValueType().isVector() &&
8256 Op.getValueType().getVectorNumElements() ==
8257 Op.getOperand(0).getValueType().getVectorNumElements())
8258 Op = Op.getOperand(0);
8259
8260 // Pull BSWAP out of a vector extraction.
8261 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8262 EVT VecVT = Op.getValueType();
8263 EVT EltVT = VecVT.getVectorElementType();
8264 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8265 Op.getOperand(0), N->getOperand(1));
8266 DCI.AddToWorklist(Op.getNode());
8267 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8268 if (EltVT != N->getValueType(0)) {
8269 DCI.AddToWorklist(Op.getNode());
8270 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8271 }
8272 return Op;
8273 }
8274
8275 // Try to simplify a vector extraction.
8276 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8277 SDValue Op0 = N->getOperand(0);
8278 EVT VecVT = Op0.getValueType();
8279 if (canTreatAsByteVector(VecVT))
8280 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8281 IndexN->getZExtValue(), DCI, false);
8282 }
8283 return SDValue();
8284}
8285
8286SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8287 SDNode *N, DAGCombinerInfo &DCI) const {
8288 SelectionDAG &DAG = DCI.DAG;
8289 // (join_dwords X, X) == (replicate X)
8290 if (N->getOperand(0) == N->getOperand(1))
8291 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8292 N->getOperand(0));
8293 return SDValue();
8294}
8295
8297 SDValue Chain1 = N1->getOperand(0);
8298 SDValue Chain2 = N2->getOperand(0);
8299
8300 // Trivial case: both nodes take the same chain.
8301 if (Chain1 == Chain2)
8302 return Chain1;
8303
8304 // FIXME - we could handle more complex cases via TokenFactor,
8305 // assuming we can verify that this would not create a cycle.
8306 return SDValue();
8307}
8308
8309SDValue SystemZTargetLowering::combineFP_ROUND(
8310 SDNode *N, DAGCombinerInfo &DCI) const {
8311
8312 if (!Subtarget.hasVector())
8313 return SDValue();
8314
8315 // (fpround (extract_vector_elt X 0))
8316 // (fpround (extract_vector_elt X 1)) ->
8317 // (extract_vector_elt (VROUND X) 0)
8318 // (extract_vector_elt (VROUND X) 2)
8319 //
8320 // This is a special case since the target doesn't really support v2f32s.
8321 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8322 SelectionDAG &DAG = DCI.DAG;
8323 SDValue Op0 = N->getOperand(OpNo);
8324 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8326 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8327 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8328 Op0.getConstantOperandVal(1) == 0) {
8329 SDValue Vec = Op0.getOperand(0);
8330 for (auto *U : Vec->users()) {
8331 if (U != Op0.getNode() && U->hasOneUse() &&
8332 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8333 U->getOperand(0) == Vec &&
8334 U->getOperand(1).getOpcode() == ISD::Constant &&
8335 U->getConstantOperandVal(1) == 1) {
8336 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8337 if (OtherRound.getOpcode() == N->getOpcode() &&
8338 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8339 OtherRound.getValueType() == MVT::f32) {
8340 SDValue VRound, Chain;
8341 if (N->isStrictFPOpcode()) {
8342 Chain = MergeInputChains(N, OtherRound.getNode());
8343 if (!Chain)
8344 continue;
8345 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8346 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8347 Chain = VRound.getValue(1);
8348 } else
8349 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8350 MVT::v4f32, Vec);
8351 DCI.AddToWorklist(VRound.getNode());
8352 SDValue Extract1 =
8353 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8354 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8355 DCI.AddToWorklist(Extract1.getNode());
8356 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8357 if (Chain)
8358 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8359 SDValue Extract0 =
8360 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8361 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8362 if (Chain)
8363 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8364 N->getVTList(), Extract0, Chain);
8365 return Extract0;
8366 }
8367 }
8368 }
8369 }
8370 return SDValue();
8371}
8372
8373SDValue SystemZTargetLowering::combineFP_EXTEND(
8374 SDNode *N, DAGCombinerInfo &DCI) const {
8375
8376 if (!Subtarget.hasVector())
8377 return SDValue();
8378
8379 // (fpextend (extract_vector_elt X 0))
8380 // (fpextend (extract_vector_elt X 2)) ->
8381 // (extract_vector_elt (VEXTEND X) 0)
8382 // (extract_vector_elt (VEXTEND X) 1)
8383 //
8384 // This is a special case since the target doesn't really support v2f32s.
8385 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8386 SelectionDAG &DAG = DCI.DAG;
8387 SDValue Op0 = N->getOperand(OpNo);
8388 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8390 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8391 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8392 Op0.getConstantOperandVal(1) == 0) {
8393 SDValue Vec = Op0.getOperand(0);
8394 for (auto *U : Vec->users()) {
8395 if (U != Op0.getNode() && U->hasOneUse() &&
8396 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8397 U->getOperand(0) == Vec &&
8398 U->getOperand(1).getOpcode() == ISD::Constant &&
8399 U->getConstantOperandVal(1) == 2) {
8400 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8401 if (OtherExtend.getOpcode() == N->getOpcode() &&
8402 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8403 OtherExtend.getValueType() == MVT::f64) {
8404 SDValue VExtend, Chain;
8405 if (N->isStrictFPOpcode()) {
8406 Chain = MergeInputChains(N, OtherExtend.getNode());
8407 if (!Chain)
8408 continue;
8409 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8410 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8411 Chain = VExtend.getValue(1);
8412 } else
8413 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8414 MVT::v2f64, Vec);
8415 DCI.AddToWorklist(VExtend.getNode());
8416 SDValue Extract1 =
8417 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8418 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8419 DCI.AddToWorklist(Extract1.getNode());
8420 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8421 if (Chain)
8422 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8423 SDValue Extract0 =
8424 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8425 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8426 if (Chain)
8427 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8428 N->getVTList(), Extract0, Chain);
8429 return Extract0;
8430 }
8431 }
8432 }
8433 }
8434 return SDValue();
8435}
8436
8437SDValue SystemZTargetLowering::combineINT_TO_FP(
8438 SDNode *N, DAGCombinerInfo &DCI) const {
8439 if (DCI.Level != BeforeLegalizeTypes)
8440 return SDValue();
8441 SelectionDAG &DAG = DCI.DAG;
8442 LLVMContext &Ctx = *DAG.getContext();
8443 unsigned Opcode = N->getOpcode();
8444 EVT OutVT = N->getValueType(0);
8445 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8446 SDValue Op = N->getOperand(0);
8447 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8448 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8449
8450 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8451 // v2f64 = uint_to_fp v2i16
8452 // =>
8453 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8454 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8455 OutScalarBits <= 64) {
8456 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8457 EVT ExtVT = EVT::getVectorVT(
8458 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8459 unsigned ExtOpcode =
8461 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8462 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8463 }
8464 return SDValue();
8465}
8466
8467SDValue SystemZTargetLowering::combineFCOPYSIGN(
8468 SDNode *N, DAGCombinerInfo &DCI) const {
8469 SelectionDAG &DAG = DCI.DAG;
8470 EVT VT = N->getValueType(0);
8471 SDValue ValOp = N->getOperand(0);
8472 SDValue SignOp = N->getOperand(1);
8473
8474 // Remove the rounding which is not needed.
8475 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8476 SDValue WideOp = SignOp.getOperand(0);
8477 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8478 }
8479
8480 return SDValue();
8481}
8482
8483SDValue SystemZTargetLowering::combineBSWAP(
8484 SDNode *N, DAGCombinerInfo &DCI) const {
8485 SelectionDAG &DAG = DCI.DAG;
8486 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8487 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8488 N->getOperand(0).hasOneUse() &&
8489 canLoadStoreByteSwapped(N->getValueType(0))) {
8490 SDValue Load = N->getOperand(0);
8491 LoadSDNode *LD = cast<LoadSDNode>(Load);
8492
8493 // Create the byte-swapping load.
8494 SDValue Ops[] = {
8495 LD->getChain(), // Chain
8496 LD->getBasePtr() // Ptr
8497 };
8498 EVT LoadVT = N->getValueType(0);
8499 if (LoadVT == MVT::i16)
8500 LoadVT = MVT::i32;
8501 SDValue BSLoad =
8502 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8503 DAG.getVTList(LoadVT, MVT::Other),
8504 Ops, LD->getMemoryVT(), LD->getMemOperand());
8505
8506 // If this is an i16 load, insert the truncate.
8507 SDValue ResVal = BSLoad;
8508 if (N->getValueType(0) == MVT::i16)
8509 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8510
8511 // First, combine the bswap away. This makes the value produced by the
8512 // load dead.
8513 DCI.CombineTo(N, ResVal);
8514
8515 // Next, combine the load away, we give it a bogus result value but a real
8516 // chain result. The result value is dead because the bswap is dead.
8517 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8518
8519 // Return N so it doesn't get rechecked!
8520 return SDValue(N, 0);
8521 }
8522
8523 // Look through bitcasts that retain the number of vector elements.
8524 SDValue Op = N->getOperand(0);
8525 if (Op.getOpcode() == ISD::BITCAST &&
8526 Op.getValueType().isVector() &&
8527 Op.getOperand(0).getValueType().isVector() &&
8528 Op.getValueType().getVectorNumElements() ==
8529 Op.getOperand(0).getValueType().getVectorNumElements())
8530 Op = Op.getOperand(0);
8531
8532 // Push BSWAP into a vector insertion if at least one side then simplifies.
8533 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8534 SDValue Vec = Op.getOperand(0);
8535 SDValue Elt = Op.getOperand(1);
8536 SDValue Idx = Op.getOperand(2);
8537
8539 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8541 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8542 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8543 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8544 EVT VecVT = N->getValueType(0);
8545 EVT EltVT = N->getValueType(0).getVectorElementType();
8546 if (VecVT != Vec.getValueType()) {
8547 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8548 DCI.AddToWorklist(Vec.getNode());
8549 }
8550 if (EltVT != Elt.getValueType()) {
8551 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8552 DCI.AddToWorklist(Elt.getNode());
8553 }
8554 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8555 DCI.AddToWorklist(Vec.getNode());
8556 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8557 DCI.AddToWorklist(Elt.getNode());
8558 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8559 Vec, Elt, Idx);
8560 }
8561 }
8562
8563 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8564 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8565 if (SV && Op.hasOneUse()) {
8566 SDValue Op0 = Op.getOperand(0);
8567 SDValue Op1 = Op.getOperand(1);
8568
8570 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8572 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8573 EVT VecVT = N->getValueType(0);
8574 if (VecVT != Op0.getValueType()) {
8575 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8576 DCI.AddToWorklist(Op0.getNode());
8577 }
8578 if (VecVT != Op1.getValueType()) {
8579 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8580 DCI.AddToWorklist(Op1.getNode());
8581 }
8582 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8583 DCI.AddToWorklist(Op0.getNode());
8584 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8585 DCI.AddToWorklist(Op1.getNode());
8586 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8587 }
8588 }
8589
8590 return SDValue();
8591}
8592
8593SDValue SystemZTargetLowering::combineSETCC(
8594 SDNode *N, DAGCombinerInfo &DCI) const {
8595 SelectionDAG &DAG = DCI.DAG;
8596 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8597 const SDValue LHS = N->getOperand(0);
8598 const SDValue RHS = N->getOperand(1);
8599 bool CmpNull = isNullConstant(RHS);
8600 bool CmpAllOnes = isAllOnesConstant(RHS);
8601 EVT VT = N->getValueType(0);
8602 SDLoc DL(N);
8603
8604 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8605 // change the outer compare to a i128 compare. This will normally
8606 // allow the reduction to be recognized in adjustICmp128, and even if
8607 // not, the i128 compare will still generate better code.
8608 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8610 if (Src.getOpcode() == ISD::SETCC &&
8611 Src.getValueType().isFixedLengthVector() &&
8612 Src.getValueType().getScalarType() == MVT::i1) {
8613 EVT CmpVT = Src.getOperand(0).getValueType();
8614 if (CmpVT.getSizeInBits() == 128) {
8615 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8616 SDValue LHS =
8617 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8618 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8619 : DAG.getAllOnesConstant(DL, MVT::i128);
8620 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8621 N->getFlags());
8622 }
8623 }
8624 }
8625
8626 return SDValue();
8627}
8628
8629static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8630 switch (Val.getOpcode()) {
8631 default:
8632 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8633 case SystemZISD::IPM:
8634 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8635 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8636 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8637 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8638 case SystemZISD::SELECT_CCMASK: {
8639 SDValue Op4CCReg = Val.getOperand(4);
8640 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8641 Op4CCReg.getOpcode() == SystemZISD::TM) {
8642 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8643 if (OpCC != SDValue())
8644 return std::make_pair(OpCC, OpCCValid);
8645 }
8646 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8647 if (!CCValid)
8648 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8649 int CCValidVal = CCValid->getZExtValue();
8650 return std::make_pair(Op4CCReg, CCValidVal);
8651 }
8652 case ISD::ADD:
8653 case ISD::AND:
8654 case ISD::OR:
8655 case ISD::XOR:
8656 case ISD::SHL:
8657 case ISD::SRA:
8658 case ISD::SRL:
8659 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8660 if (Op0CC != SDValue())
8661 return std::make_pair(Op0CC, Op0CCValid);
8662 return findCCUse(Val.getOperand(1));
8663 }
8664}
8665
8666static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8667 SelectionDAG &DAG);
8668
8670 SelectionDAG &DAG) {
8671 SDLoc DL(Val);
8672 auto Opcode = Val.getOpcode();
8673 switch (Opcode) {
8674 default:
8675 return {};
8676 case ISD::Constant:
8677 return {Val, Val, Val, Val};
8678 case SystemZISD::IPM: {
8679 SDValue IPMOp0 = Val.getOperand(0);
8680 if (IPMOp0 != CC)
8681 return {};
8682 SmallVector<SDValue, 4> ShiftedCCVals;
8683 for (auto CC : {0, 1, 2, 3})
8684 ShiftedCCVals.emplace_back(
8685 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8686 return ShiftedCCVals;
8687 }
8688 case SystemZISD::SELECT_CCMASK: {
8689 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8690 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8691 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8692 if (!CCValid || !CCMask)
8693 return {};
8694
8695 int CCValidVal = CCValid->getZExtValue();
8696 int CCMaskVal = CCMask->getZExtValue();
8697 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8698 // recursive call to simplifyAssumingCCVal.
8699 SDValue Op4CCReg = Val.getOperand(4);
8700 if (Op4CCReg != CC)
8701 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8702 if (Op4CCReg != CC)
8703 return {};
8704 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8705 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8706 if (TrueSDVals.empty() || FalseSDVals.empty())
8707 return {};
8708 SmallVector<SDValue, 4> MergedSDVals;
8709 for (auto &CCVal : {0, 1, 2, 3})
8710 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8711 ? TrueSDVals[CCVal]
8712 : FalseSDVals[CCVal]);
8713 return MergedSDVals;
8714 }
8715 case ISD::ADD:
8716 case ISD::AND:
8717 case ISD::OR:
8718 case ISD::XOR:
8719 case ISD::SRA:
8720 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8721 // would clobber CC).
8722 if (!Val.hasOneUse())
8723 return {};
8724 [[fallthrough]];
8725 case ISD::SHL:
8726 case ISD::SRL:
8727 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8728 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8729 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8730 if (Op0SDVals.empty() || Op1SDVals.empty())
8731 return {};
8732 SmallVector<SDValue, 4> BinaryOpSDVals;
8733 for (auto CCVal : {0, 1, 2, 3})
8734 BinaryOpSDVals.emplace_back(DAG.getNode(
8735 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8736 return BinaryOpSDVals;
8737 }
8738}
8739
8740static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8741 SelectionDAG &DAG) {
8742 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8743 // set by the CCReg instruction using the CCValid / CCMask masks,
8744 // If the CCReg instruction is itself a ICMP / TM testing the condition
8745 // code set by some other instruction, see whether we can directly
8746 // use that condition code.
8747 auto *CCNode = CCReg.getNode();
8748 if (!CCNode)
8749 return false;
8750
8751 if (CCNode->getOpcode() == SystemZISD::TM) {
8752 if (CCValid != SystemZ::CCMASK_TM)
8753 return false;
8754 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8755 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8756 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8757 if (!Op0Node || !Op1Node)
8758 return -1;
8759 auto Op0APVal = Op0Node->getAPIntValue();
8760 auto Op1APVal = Op1Node->getAPIntValue();
8761 auto Result = Op0APVal & Op1APVal;
8762 bool AllOnes = Result == Op1APVal;
8763 bool AllZeros = Result == 0;
8764 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8765 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8766 };
8767 SDValue Op0 = CCNode->getOperand(0);
8768 SDValue Op1 = CCNode->getOperand(1);
8769 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8770 if (Op0CC == SDValue())
8771 return false;
8772 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8773 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8774 if (Op0SDVals.empty() || Op1SDVals.empty())
8775 return false;
8776 int NewCCMask = 0;
8777 for (auto CC : {0, 1, 2, 3}) {
8778 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8779 if (CCVal < 0)
8780 return false;
8781 NewCCMask <<= 1;
8782 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8783 }
8784 NewCCMask &= Op0CCValid;
8785 CCReg = Op0CC;
8786 CCMask = NewCCMask;
8787 CCValid = Op0CCValid;
8788 return true;
8789 }
8790 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8791 CCValid != SystemZ::CCMASK_ICMP)
8792 return false;
8793
8794 SDValue CmpOp0 = CCNode->getOperand(0);
8795 SDValue CmpOp1 = CCNode->getOperand(1);
8796 SDValue CmpOp2 = CCNode->getOperand(2);
8797 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8798 if (Op0CC != SDValue()) {
8799 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8800 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8801 if (Op0SDVals.empty() || Op1SDVals.empty())
8802 return false;
8803
8804 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8805 auto CmpTypeVal = CmpType->getZExtValue();
8806 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8807 const SDValue &Op1Val) {
8808 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8809 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8810 if (!Op0Node || !Op1Node)
8811 return -1;
8812 auto Op0APVal = Op0Node->getAPIntValue();
8813 auto Op1APVal = Op1Node->getAPIntValue();
8814 if (CmpTypeVal == SystemZICMP::SignedOnly)
8815 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8816 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8817 };
8818 int NewCCMask = 0;
8819 for (auto CC : {0, 1, 2, 3}) {
8820 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8821 if (CCVal < 0)
8822 return false;
8823 NewCCMask <<= 1;
8824 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8825 }
8826 NewCCMask &= Op0CCValid;
8827 CCMask = NewCCMask;
8828 CCReg = Op0CC;
8829 CCValid = Op0CCValid;
8830 return true;
8831 }
8832
8833 return false;
8834}
8835
8836// Merging versus split in multiple branches cost.
8839 const Value *Lhs,
8840 const Value *Rhs) const {
8841 const auto isFlagOutOpCC = [](const Value *V) {
8842 using namespace llvm::PatternMatch;
8843 const Value *RHSVal;
8844 const APInt *RHSC;
8845 if (const auto *I = dyn_cast<Instruction>(V)) {
8846 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8847 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8848 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8849 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8850 if (CB->isInlineAsm()) {
8851 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8852 return IA && IA->getConstraintString().contains("{@cc}");
8853 }
8854 }
8855 }
8856 }
8857 return false;
8858 };
8859 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8860 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8861 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8862 // conditionals will be merged or else conditionals will be split.
8863 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8864 return {3, 0, -1};
8865 // Default.
8866 return {-1, -1, -1};
8867}
8868
8869SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8870 DAGCombinerInfo &DCI) const {
8871 SelectionDAG &DAG = DCI.DAG;
8872
8873 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8874 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8875 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8876 if (!CCValid || !CCMask)
8877 return SDValue();
8878
8879 int CCValidVal = CCValid->getZExtValue();
8880 int CCMaskVal = CCMask->getZExtValue();
8881 SDValue Chain = N->getOperand(0);
8882 SDValue CCReg = N->getOperand(4);
8883 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
8884 // the modified BR_CCMASK with the new values.
8885 // In order to avoid conditional branches with full or empty cc masks, do not
8886 // do this if ccmask is 0 or equal to ccvalid.
8887 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
8888 CCMaskVal != CCValidVal)
8889 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8890 Chain,
8891 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8892 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8893 N->getOperand(3), CCReg);
8894 return SDValue();
8895}
8896
8897SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8898 SDNode *N, DAGCombinerInfo &DCI) const {
8899 SelectionDAG &DAG = DCI.DAG;
8900
8901 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8902 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8903 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8904 if (!CCValid || !CCMask)
8905 return SDValue();
8906
8907 int CCValidVal = CCValid->getZExtValue();
8908 int CCMaskVal = CCMask->getZExtValue();
8909 SDValue CCReg = N->getOperand(4);
8910
8911 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
8912
8913 // Populate SDVals vector for each condition code ccval for given Val, which
8914 // can again be another nested select_ccmask with the same CC.
8915 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8916 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
8918 if (Val.getOperand(4) != CCReg)
8919 return SmallVector<SDValue, 4>{};
8920 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8921 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8922 if (!CCMask)
8923 return SmallVector<SDValue, 4>{};
8924
8925 int CCMaskVal = CCMask->getZExtValue();
8926 for (auto &CC : {0, 1, 2, 3})
8927 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
8928 : FalseVal);
8929 return Res;
8930 }
8931 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
8932 };
8933 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
8934 // with CCReg found by combineCCMask or original CCReg.
8935 SDValue TrueVal = N->getOperand(0);
8936 SDValue FalseVal = N->getOperand(1);
8937 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
8938 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
8939 // TrueSDVals/FalseSDVals might be empty in case of non-constant
8940 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
8941 if (TrueSDVals.empty())
8942 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
8943 if (FalseSDVals.empty())
8944 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
8945 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
8946 SmallSet<SDValue, 4> MergedSDValsSet;
8947 // Ignoring CC values outside CCValiid.
8948 for (auto CC : {0, 1, 2, 3}) {
8949 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
8950 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
8951 ? TrueSDVals[CC]
8952 : FalseSDVals[CC]);
8953 }
8954 if (MergedSDValsSet.size() == 1)
8955 return *MergedSDValsSet.begin();
8956 if (MergedSDValsSet.size() == 2) {
8957 auto BeginIt = MergedSDValsSet.begin();
8958 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
8959 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
8960 std::swap(NewTrueVal, NewFalseVal);
8961 int NewCCMask = 0;
8962 for (auto CC : {0, 1, 2, 3}) {
8963 NewCCMask <<= 1;
8964 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
8965 ? (TrueSDVals[CC] == NewTrueVal)
8966 : (FalseSDVals[CC] == NewTrueVal);
8967 }
8968 CCMaskVal = NewCCMask;
8969 CCMaskVal &= CCValidVal;
8970 TrueVal = NewTrueVal;
8971 FalseVal = NewFalseVal;
8972 IsCombinedCCReg = true;
8973 }
8974 }
8975 // If the condition is trivially false or trivially true after
8976 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
8977 // (possibly modified by constructCCSDValsFromSELECT).
8978 if (CCMaskVal == 0)
8979 return FalseVal;
8980 if (CCMaskVal == CCValidVal)
8981 return TrueVal;
8982
8983 if (IsCombinedCCReg)
8984 return DAG.getNode(
8985 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
8986 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8987 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
8988
8989 return SDValue();
8990}
8991
8992SDValue SystemZTargetLowering::combineGET_CCMASK(
8993 SDNode *N, DAGCombinerInfo &DCI) const {
8994
8995 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8996 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8997 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8998 if (!CCValid || !CCMask)
8999 return SDValue();
9000 int CCValidVal = CCValid->getZExtValue();
9001 int CCMaskVal = CCMask->getZExtValue();
9002
9003 SDValue Select = N->getOperand(0);
9004 if (Select->getOpcode() == ISD::TRUNCATE)
9005 Select = Select->getOperand(0);
9006 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9007 return SDValue();
9008
9009 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9010 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9011 if (!SelectCCValid || !SelectCCMask)
9012 return SDValue();
9013 int SelectCCValidVal = SelectCCValid->getZExtValue();
9014 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9015
9016 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9017 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9018 if (!TrueVal || !FalseVal)
9019 return SDValue();
9020 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9021 ;
9022 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9023 SelectCCMaskVal ^= SelectCCValidVal;
9024 else
9025 return SDValue();
9026
9027 if (SelectCCValidVal & ~CCValidVal)
9028 return SDValue();
9029 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9030 return SDValue();
9031
9032 return Select->getOperand(4);
9033}
9034
9035SDValue SystemZTargetLowering::combineIntDIVREM(
9036 SDNode *N, DAGCombinerInfo &DCI) const {
9037 SelectionDAG &DAG = DCI.DAG;
9038 EVT VT = N->getValueType(0);
9039 // In the case where the divisor is a vector of constants a cheaper
9040 // sequence of instructions can replace the divide. BuildSDIV is called to
9041 // do this during DAG combining, but it only succeeds when it can build a
9042 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9043 // since it is not Legal but Custom it can only happen before
9044 // legalization. Therefore we must scalarize this early before Combine
9045 // 1. For widened vectors, this is already the result of type legalization.
9046 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9047 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9048 return DAG.UnrollVectorOp(N);
9049 return SDValue();
9050}
9051
9052
9053// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9054// This is closely modeled after the common-code combineShiftToMULH.
9055SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9056 SDNode *N, DAGCombinerInfo &DCI) const {
9057 SelectionDAG &DAG = DCI.DAG;
9058 SDLoc DL(N);
9059
9060 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9061 "SRL or SRA node is required here!");
9062
9063 if (!Subtarget.hasVector())
9064 return SDValue();
9065
9066 // Check the shift amount. Proceed with the transformation if the shift
9067 // amount is constant.
9068 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9069 if (!ShiftAmtSrc)
9070 return SDValue();
9071
9072 // The operation feeding into the shift must be an add.
9073 SDValue ShiftOperand = N->getOperand(0);
9074 if (ShiftOperand.getOpcode() != ISD::ADD)
9075 return SDValue();
9076
9077 // One operand of the add must be a multiply.
9078 SDValue MulOp = ShiftOperand.getOperand(0);
9079 SDValue AddOp = ShiftOperand.getOperand(1);
9080 if (MulOp.getOpcode() != ISD::MUL) {
9081 if (AddOp.getOpcode() != ISD::MUL)
9082 return SDValue();
9083 std::swap(MulOp, AddOp);
9084 }
9085
9086 // All operands must be equivalent extend nodes.
9087 SDValue LeftOp = MulOp.getOperand(0);
9088 SDValue RightOp = MulOp.getOperand(1);
9089
9090 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9091 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9092
9093 if (!IsSignExt && !IsZeroExt)
9094 return SDValue();
9095
9096 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9097 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9098
9099 SDValue MulhRightOp;
9100 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9101 unsigned ActiveBits = IsSignExt
9102 ? Constant->getAPIntValue().getSignificantBits()
9103 : Constant->getAPIntValue().getActiveBits();
9104 if (ActiveBits > NarrowVTSize)
9105 return SDValue();
9106 MulhRightOp = DAG.getConstant(
9107 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9108 NarrowVT);
9109 } else {
9110 if (LeftOp.getOpcode() != RightOp.getOpcode())
9111 return SDValue();
9112 // Check that the two extend nodes are the same type.
9113 if (NarrowVT != RightOp.getOperand(0).getValueType())
9114 return SDValue();
9115 MulhRightOp = RightOp.getOperand(0);
9116 }
9117
9118 SDValue MulhAddOp;
9119 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9120 unsigned ActiveBits = IsSignExt
9121 ? Constant->getAPIntValue().getSignificantBits()
9122 : Constant->getAPIntValue().getActiveBits();
9123 if (ActiveBits > NarrowVTSize)
9124 return SDValue();
9125 MulhAddOp = DAG.getConstant(
9126 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9127 NarrowVT);
9128 } else {
9129 if (LeftOp.getOpcode() != AddOp.getOpcode())
9130 return SDValue();
9131 // Check that the two extend nodes are the same type.
9132 if (NarrowVT != AddOp.getOperand(0).getValueType())
9133 return SDValue();
9134 MulhAddOp = AddOp.getOperand(0);
9135 }
9136
9137 EVT WideVT = LeftOp.getValueType();
9138 // Proceed with the transformation if the wide types match.
9139 assert((WideVT == RightOp.getValueType()) &&
9140 "Cannot have a multiply node with two different operand types.");
9141 assert((WideVT == AddOp.getValueType()) &&
9142 "Cannot have an add node with two different operand types.");
9143
9144 // Proceed with the transformation if the wide type is twice as large
9145 // as the narrow type.
9146 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9147 return SDValue();
9148
9149 // Check the shift amount with the narrow type size.
9150 // Proceed with the transformation if the shift amount is the width
9151 // of the narrow type.
9152 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9153 if (ShiftAmt != NarrowVTSize)
9154 return SDValue();
9155
9156 // Proceed if we support the multiply-and-add-high operation.
9157 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9158 NarrowVT == MVT::v4i32 ||
9159 (Subtarget.hasVectorEnhancements3() &&
9160 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9161 return SDValue();
9162
9163 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9164 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9165 DL, NarrowVT, LeftOp.getOperand(0),
9166 MulhRightOp, MulhAddOp);
9167 bool IsSigned = N->getOpcode() == ISD::SRA;
9168 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9169}
9170
9171// Op is an operand of a multiplication. Check whether this can be folded
9172// into an even/odd widening operation; if so, return the opcode to be used
9173// and update Op to the appropriate sub-operand. Note that the caller must
9174// verify that *both* operands of the multiplication support the operation.
9176 const SystemZSubtarget &Subtarget,
9177 SDValue &Op) {
9178 EVT VT = Op.getValueType();
9179
9180 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9181 // to selecting the even or odd vector elements.
9182 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9183 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9184 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9185 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9186 unsigned NumElts = VT.getVectorNumElements();
9187 Op = Op.getOperand(0);
9188 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9189 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9191 ArrayRef<int> ShuffleMask = SVN->getMask();
9192 bool CanUseEven = true, CanUseOdd = true;
9193 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9194 if (ShuffleMask[Elt] == -1)
9195 continue;
9196 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9197 CanUseEven = false;
9198 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9199 CanUseOdd = false;
9200 }
9201 Op = Op.getOperand(0);
9202 if (CanUseEven)
9203 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9204 if (CanUseOdd)
9205 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9206 }
9207 }
9208
9209 // For z17, we can also support the v2i64->i128 case, which looks like
9210 // (sign/zero_extend (extract_vector_elt X 0/1))
9211 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9212 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9213 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9214 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9215 Op = Op.getOperand(0);
9216 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9217 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9218 Op.getOperand(1).getOpcode() == ISD::Constant) {
9219 unsigned Elem = Op.getConstantOperandVal(1);
9220 Op = Op.getOperand(0);
9221 if (Elem == 0)
9222 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9223 if (Elem == 1)
9224 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9225 }
9226 }
9227
9228 return 0;
9229}
9230
9231SDValue SystemZTargetLowering::combineMUL(
9232 SDNode *N, DAGCombinerInfo &DCI) const {
9233 SelectionDAG &DAG = DCI.DAG;
9234
9235 // Detect even/odd widening multiplication.
9236 SDValue Op0 = N->getOperand(0);
9237 SDValue Op1 = N->getOperand(1);
9238 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9239 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9240 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9241 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9242
9243 return SDValue();
9244}
9245
9246SDValue SystemZTargetLowering::combineINTRINSIC(
9247 SDNode *N, DAGCombinerInfo &DCI) const {
9248 SelectionDAG &DAG = DCI.DAG;
9249
9250 unsigned Id = N->getConstantOperandVal(1);
9251 switch (Id) {
9252 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9253 // or larger is simply a vector load.
9254 case Intrinsic::s390_vll:
9255 case Intrinsic::s390_vlrl:
9256 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9257 if (C->getZExtValue() >= 15)
9258 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9259 N->getOperand(3), MachinePointerInfo());
9260 break;
9261 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9262 case Intrinsic::s390_vstl:
9263 case Intrinsic::s390_vstrl:
9264 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9265 if (C->getZExtValue() >= 15)
9266 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9267 N->getOperand(4), MachinePointerInfo());
9268 break;
9269 }
9270
9271 return SDValue();
9272}
9273
9274SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9275 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9276 return N->getOperand(0);
9277 return N;
9278}
9279
9281 DAGCombinerInfo &DCI) const {
9282 switch(N->getOpcode()) {
9283 default: break;
9284 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9285 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9286 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9287 case SystemZISD::MERGE_HIGH:
9288 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9289 case ISD::LOAD: return combineLOAD(N, DCI);
9290 case ISD::STORE: return combineSTORE(N, DCI);
9291 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9292 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9293 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9295 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9297 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9298 case ISD::SINT_TO_FP:
9299 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9300 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9301 case ISD::BSWAP: return combineBSWAP(N, DCI);
9302 case ISD::SETCC: return combineSETCC(N, DCI);
9303 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9304 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9305 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9306 case ISD::SRL:
9307 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9308 case ISD::MUL: return combineMUL(N, DCI);
9309 case ISD::SDIV:
9310 case ISD::UDIV:
9311 case ISD::SREM:
9312 case ISD::UREM: return combineIntDIVREM(N, DCI);
9314 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9315 }
9316
9317 return SDValue();
9318}
9319
9320// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9321// are for Op.
9322static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9323 unsigned OpNo) {
9324 EVT VT = Op.getValueType();
9325 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9326 APInt SrcDemE;
9327 unsigned Opcode = Op.getOpcode();
9328 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9329 unsigned Id = Op.getConstantOperandVal(0);
9330 switch (Id) {
9331 case Intrinsic::s390_vpksh: // PACKS
9332 case Intrinsic::s390_vpksf:
9333 case Intrinsic::s390_vpksg:
9334 case Intrinsic::s390_vpkshs: // PACKS_CC
9335 case Intrinsic::s390_vpksfs:
9336 case Intrinsic::s390_vpksgs:
9337 case Intrinsic::s390_vpklsh: // PACKLS
9338 case Intrinsic::s390_vpklsf:
9339 case Intrinsic::s390_vpklsg:
9340 case Intrinsic::s390_vpklshs: // PACKLS_CC
9341 case Intrinsic::s390_vpklsfs:
9342 case Intrinsic::s390_vpklsgs:
9343 // VECTOR PACK truncates the elements of two source vectors into one.
9344 SrcDemE = DemandedElts;
9345 if (OpNo == 2)
9346 SrcDemE.lshrInPlace(NumElts / 2);
9347 SrcDemE = SrcDemE.trunc(NumElts / 2);
9348 break;
9349 // VECTOR UNPACK extends half the elements of the source vector.
9350 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9351 case Intrinsic::s390_vuphh:
9352 case Intrinsic::s390_vuphf:
9353 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9354 case Intrinsic::s390_vuplhh:
9355 case Intrinsic::s390_vuplhf:
9356 SrcDemE = APInt(NumElts * 2, 0);
9357 SrcDemE.insertBits(DemandedElts, 0);
9358 break;
9359 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9360 case Intrinsic::s390_vuplhw:
9361 case Intrinsic::s390_vuplf:
9362 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9363 case Intrinsic::s390_vupllh:
9364 case Intrinsic::s390_vupllf:
9365 SrcDemE = APInt(NumElts * 2, 0);
9366 SrcDemE.insertBits(DemandedElts, NumElts);
9367 break;
9368 case Intrinsic::s390_vpdi: {
9369 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9370 SrcDemE = APInt(NumElts, 0);
9371 if (!DemandedElts[OpNo - 1])
9372 break;
9373 unsigned Mask = Op.getConstantOperandVal(3);
9374 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9375 // Demand input element 0 or 1, given by the mask bit value.
9376 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9377 break;
9378 }
9379 case Intrinsic::s390_vsldb: {
9380 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9381 assert(VT == MVT::v16i8 && "Unexpected type.");
9382 unsigned FirstIdx = Op.getConstantOperandVal(3);
9383 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9384 unsigned NumSrc0Els = 16 - FirstIdx;
9385 SrcDemE = APInt(NumElts, 0);
9386 if (OpNo == 1) {
9387 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9388 SrcDemE.insertBits(DemEls, FirstIdx);
9389 } else {
9390 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9391 SrcDemE.insertBits(DemEls, 0);
9392 }
9393 break;
9394 }
9395 case Intrinsic::s390_vperm:
9396 SrcDemE = APInt::getAllOnes(NumElts);
9397 break;
9398 default:
9399 llvm_unreachable("Unhandled intrinsic.");
9400 break;
9401 }
9402 } else {
9403 switch (Opcode) {
9404 case SystemZISD::JOIN_DWORDS:
9405 // Scalar operand.
9406 SrcDemE = APInt(1, 1);
9407 break;
9408 case SystemZISD::SELECT_CCMASK:
9409 SrcDemE = DemandedElts;
9410 break;
9411 default:
9412 llvm_unreachable("Unhandled opcode.");
9413 break;
9414 }
9415 }
9416 return SrcDemE;
9417}
9418
9419static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9420 const APInt &DemandedElts,
9421 const SelectionDAG &DAG, unsigned Depth,
9422 unsigned OpNo) {
9423 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9424 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9425 KnownBits LHSKnown =
9426 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9427 KnownBits RHSKnown =
9428 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9429 Known = LHSKnown.intersectWith(RHSKnown);
9430}
9431
9432void
9434 KnownBits &Known,
9435 const APInt &DemandedElts,
9436 const SelectionDAG &DAG,
9437 unsigned Depth) const {
9438 Known.resetAll();
9439
9440 // Intrinsic CC result is returned in the two low bits.
9441 unsigned Tmp0, Tmp1; // not used
9442 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9443 Known.Zero.setBitsFrom(2);
9444 return;
9445 }
9446 EVT VT = Op.getValueType();
9447 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9448 return;
9449 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9450 "KnownBits does not match VT in bitwidth");
9451 assert ((!VT.isVector() ||
9452 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9453 "DemandedElts does not match VT number of elements");
9454 unsigned BitWidth = Known.getBitWidth();
9455 unsigned Opcode = Op.getOpcode();
9456 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9457 bool IsLogical = false;
9458 unsigned Id = Op.getConstantOperandVal(0);
9459 switch (Id) {
9460 case Intrinsic::s390_vpksh: // PACKS
9461 case Intrinsic::s390_vpksf:
9462 case Intrinsic::s390_vpksg:
9463 case Intrinsic::s390_vpkshs: // PACKS_CC
9464 case Intrinsic::s390_vpksfs:
9465 case Intrinsic::s390_vpksgs:
9466 case Intrinsic::s390_vpklsh: // PACKLS
9467 case Intrinsic::s390_vpklsf:
9468 case Intrinsic::s390_vpklsg:
9469 case Intrinsic::s390_vpklshs: // PACKLS_CC
9470 case Intrinsic::s390_vpklsfs:
9471 case Intrinsic::s390_vpklsgs:
9472 case Intrinsic::s390_vpdi:
9473 case Intrinsic::s390_vsldb:
9474 case Intrinsic::s390_vperm:
9475 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9476 break;
9477 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9478 case Intrinsic::s390_vuplhh:
9479 case Intrinsic::s390_vuplhf:
9480 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9481 case Intrinsic::s390_vupllh:
9482 case Intrinsic::s390_vupllf:
9483 IsLogical = true;
9484 [[fallthrough]];
9485 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9486 case Intrinsic::s390_vuphh:
9487 case Intrinsic::s390_vuphf:
9488 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9489 case Intrinsic::s390_vuplhw:
9490 case Intrinsic::s390_vuplf: {
9491 SDValue SrcOp = Op.getOperand(1);
9492 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9493 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9494 if (IsLogical) {
9495 Known = Known.zext(BitWidth);
9496 } else
9497 Known = Known.sext(BitWidth);
9498 break;
9499 }
9500 default:
9501 break;
9502 }
9503 } else {
9504 switch (Opcode) {
9505 case SystemZISD::JOIN_DWORDS:
9506 case SystemZISD::SELECT_CCMASK:
9507 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9508 break;
9509 case SystemZISD::REPLICATE: {
9510 SDValue SrcOp = Op.getOperand(0);
9511 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9513 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9514 break;
9515 }
9516 default:
9517 break;
9518 }
9519 }
9520
9521 // Known has the width of the source operand(s). Adjust if needed to match
9522 // the passed bitwidth.
9523 if (Known.getBitWidth() != BitWidth)
9524 Known = Known.anyextOrTrunc(BitWidth);
9525}
9526
9527static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9528 const SelectionDAG &DAG, unsigned Depth,
9529 unsigned OpNo) {
9530 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9531 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9532 if (LHS == 1) return 1; // Early out.
9533 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9534 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9535 if (RHS == 1) return 1; // Early out.
9536 unsigned Common = std::min(LHS, RHS);
9537 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9538 EVT VT = Op.getValueType();
9539 unsigned VTBits = VT.getScalarSizeInBits();
9540 if (SrcBitWidth > VTBits) { // PACK
9541 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9542 if (Common > SrcExtraBits)
9543 return (Common - SrcExtraBits);
9544 return 1;
9545 }
9546 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9547 return Common;
9548}
9549
9550unsigned
9552 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9553 unsigned Depth) const {
9554 if (Op.getResNo() != 0)
9555 return 1;
9556 unsigned Opcode = Op.getOpcode();
9557 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9558 unsigned Id = Op.getConstantOperandVal(0);
9559 switch (Id) {
9560 case Intrinsic::s390_vpksh: // PACKS
9561 case Intrinsic::s390_vpksf:
9562 case Intrinsic::s390_vpksg:
9563 case Intrinsic::s390_vpkshs: // PACKS_CC
9564 case Intrinsic::s390_vpksfs:
9565 case Intrinsic::s390_vpksgs:
9566 case Intrinsic::s390_vpklsh: // PACKLS
9567 case Intrinsic::s390_vpklsf:
9568 case Intrinsic::s390_vpklsg:
9569 case Intrinsic::s390_vpklshs: // PACKLS_CC
9570 case Intrinsic::s390_vpklsfs:
9571 case Intrinsic::s390_vpklsgs:
9572 case Intrinsic::s390_vpdi:
9573 case Intrinsic::s390_vsldb:
9574 case Intrinsic::s390_vperm:
9575 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9576 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9577 case Intrinsic::s390_vuphh:
9578 case Intrinsic::s390_vuphf:
9579 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9580 case Intrinsic::s390_vuplhw:
9581 case Intrinsic::s390_vuplf: {
9582 SDValue PackedOp = Op.getOperand(1);
9583 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9584 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9585 EVT VT = Op.getValueType();
9586 unsigned VTBits = VT.getScalarSizeInBits();
9587 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9588 return Tmp;
9589 }
9590 default:
9591 break;
9592 }
9593 } else {
9594 switch (Opcode) {
9595 case SystemZISD::SELECT_CCMASK:
9596 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9597 default:
9598 break;
9599 }
9600 }
9601
9602 return 1;
9603}
9604
9607 const APInt &DemandedElts, const SelectionDAG &DAG,
9608 bool PoisonOnly, unsigned Depth) const {
9609 switch (Op->getOpcode()) {
9610 case SystemZISD::PCREL_WRAPPER:
9611 case SystemZISD::PCREL_OFFSET:
9612 return true;
9613 }
9614 return false;
9615}
9616
9617unsigned
9619 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9620 unsigned StackAlign = TFI->getStackAlignment();
9621 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9622 "Unexpected stack alignment");
9623 // The default stack probe size is 4096 if the function has no
9624 // stack-probe-size attribute.
9625 unsigned StackProbeSize =
9626 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9627 // Round down to the stack alignment.
9628 StackProbeSize &= ~(StackAlign - 1);
9629 return StackProbeSize ? StackProbeSize : StackAlign;
9630}
9631
9632//===----------------------------------------------------------------------===//
9633// Custom insertion
9634//===----------------------------------------------------------------------===//
9635
9636// Force base value Base into a register before MI. Return the register.
9638 const SystemZInstrInfo *TII) {
9639 MachineBasicBlock *MBB = MI.getParent();
9640 MachineFunction &MF = *MBB->getParent();
9642
9643 if (Base.isReg()) {
9644 // Copy Base into a new virtual register to help register coalescing in
9645 // cases with multiple uses.
9646 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9647 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9648 .add(Base);
9649 return Reg;
9650 }
9651
9652 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9653 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9654 .add(Base)
9655 .addImm(0)
9656 .addReg(0);
9657 return Reg;
9658}
9659
9660// The CC operand of MI might be missing a kill marker because there
9661// were multiple uses of CC, and ISel didn't know which to mark.
9662// Figure out whether MI should have had a kill marker.
9664 // Scan forward through BB for a use/def of CC.
9666 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9667 const MachineInstr &MI = *miI;
9668 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9669 return false;
9670 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9671 break; // Should have kill-flag - update below.
9672 }
9673
9674 // If we hit the end of the block, check whether CC is live into a
9675 // successor.
9676 if (miI == MBB->end()) {
9677 for (const MachineBasicBlock *Succ : MBB->successors())
9678 if (Succ->isLiveIn(SystemZ::CC))
9679 return false;
9680 }
9681
9682 return true;
9683}
9684
9685// Return true if it is OK for this Select pseudo-opcode to be cascaded
9686// together with other Select pseudo-opcodes into a single basic-block with
9687// a conditional jump around it.
9689 switch (MI.getOpcode()) {
9690 case SystemZ::Select32:
9691 case SystemZ::Select64:
9692 case SystemZ::Select128:
9693 case SystemZ::SelectF32:
9694 case SystemZ::SelectF64:
9695 case SystemZ::SelectF128:
9696 case SystemZ::SelectVR32:
9697 case SystemZ::SelectVR64:
9698 case SystemZ::SelectVR128:
9699 return true;
9700
9701 default:
9702 return false;
9703 }
9704}
9705
9706// Helper function, which inserts PHI functions into SinkMBB:
9707// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9708// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9710 MachineBasicBlock *TrueMBB,
9711 MachineBasicBlock *FalseMBB,
9712 MachineBasicBlock *SinkMBB) {
9713 MachineFunction *MF = TrueMBB->getParent();
9715
9716 MachineInstr *FirstMI = Selects.front();
9717 unsigned CCValid = FirstMI->getOperand(3).getImm();
9718 unsigned CCMask = FirstMI->getOperand(4).getImm();
9719
9720 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9721
9722 // As we are creating the PHIs, we have to be careful if there is more than
9723 // one. Later Selects may reference the results of earlier Selects, but later
9724 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9725 // That also means that PHI construction must work forward from earlier to
9726 // later, and that the code must maintain a mapping from earlier PHI's
9727 // destination registers, and the registers that went into the PHI.
9729
9730 for (auto *MI : Selects) {
9731 Register DestReg = MI->getOperand(0).getReg();
9732 Register TrueReg = MI->getOperand(1).getReg();
9733 Register FalseReg = MI->getOperand(2).getReg();
9734
9735 // If this Select we are generating is the opposite condition from
9736 // the jump we generated, then we have to swap the operands for the
9737 // PHI that is going to be generated.
9738 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9739 std::swap(TrueReg, FalseReg);
9740
9741 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9742 TrueReg = It->second.first;
9743
9744 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9745 FalseReg = It->second.second;
9746
9747 DebugLoc DL = MI->getDebugLoc();
9748 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9749 .addReg(TrueReg).addMBB(TrueMBB)
9750 .addReg(FalseReg).addMBB(FalseMBB);
9751
9752 // Add this PHI to the rewrite table.
9753 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9754 }
9755
9756 MF->getProperties().resetNoPHIs();
9757}
9758
9760SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9761 MachineBasicBlock *BB) const {
9762 MachineFunction &MF = *BB->getParent();
9763 MachineFrameInfo &MFI = MF.getFrameInfo();
9764 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9765 assert(TFL->hasReservedCallFrame(MF) &&
9766 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9767 (void)TFL;
9768 // Get the MaxCallFrameSize value and erase MI since it serves no further
9769 // purpose as the call frame is statically reserved in the prolog. Set
9770 // AdjustsStack as MI is *not* mapped as a frame instruction.
9771 uint32_t NumBytes = MI.getOperand(0).getImm();
9772 if (NumBytes > MFI.getMaxCallFrameSize())
9773 MFI.setMaxCallFrameSize(NumBytes);
9774 MFI.setAdjustsStack(true);
9775
9776 MI.eraseFromParent();
9777 return BB;
9778}
9779
9780// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9782SystemZTargetLowering::emitSelect(MachineInstr &MI,
9783 MachineBasicBlock *MBB) const {
9784 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9785 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9786
9787 unsigned CCValid = MI.getOperand(3).getImm();
9788 unsigned CCMask = MI.getOperand(4).getImm();
9789
9790 // If we have a sequence of Select* pseudo instructions using the
9791 // same condition code value, we want to expand all of them into
9792 // a single pair of basic blocks using the same condition.
9793 SmallVector<MachineInstr*, 8> Selects;
9794 SmallVector<MachineInstr*, 8> DbgValues;
9795 Selects.push_back(&MI);
9796 unsigned Count = 0;
9797 for (MachineInstr &NextMI : llvm::make_range(
9798 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9799 if (isSelectPseudo(NextMI)) {
9800 assert(NextMI.getOperand(3).getImm() == CCValid &&
9801 "Bad CCValid operands since CC was not redefined.");
9802 if (NextMI.getOperand(4).getImm() == CCMask ||
9803 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9804 Selects.push_back(&NextMI);
9805 continue;
9806 }
9807 break;
9808 }
9809 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9810 NextMI.usesCustomInsertionHook())
9811 break;
9812 bool User = false;
9813 for (auto *SelMI : Selects)
9814 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9815 User = true;
9816 break;
9817 }
9818 if (NextMI.isDebugInstr()) {
9819 if (User) {
9820 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9821 DbgValues.push_back(&NextMI);
9822 }
9823 } else if (User || ++Count > 20)
9824 break;
9825 }
9826
9827 MachineInstr *LastMI = Selects.back();
9828 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9829 checkCCKill(*LastMI, MBB));
9830 MachineBasicBlock *StartMBB = MBB;
9831 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9832 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9833
9834 // Unless CC was killed in the last Select instruction, mark it as
9835 // live-in to both FalseMBB and JoinMBB.
9836 if (!CCKilled) {
9837 FalseMBB->addLiveIn(SystemZ::CC);
9838 JoinMBB->addLiveIn(SystemZ::CC);
9839 }
9840
9841 // StartMBB:
9842 // BRC CCMask, JoinMBB
9843 // # fallthrough to FalseMBB
9844 MBB = StartMBB;
9845 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9846 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9847 MBB->addSuccessor(JoinMBB);
9848 MBB->addSuccessor(FalseMBB);
9849
9850 // FalseMBB:
9851 // # fallthrough to JoinMBB
9852 MBB = FalseMBB;
9853 MBB->addSuccessor(JoinMBB);
9854
9855 // JoinMBB:
9856 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9857 // ...
9858 MBB = JoinMBB;
9859 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9860 for (auto *SelMI : Selects)
9861 SelMI->eraseFromParent();
9862
9864 for (auto *DbgMI : DbgValues)
9865 MBB->splice(InsertPos, StartMBB, DbgMI);
9866
9867 return JoinMBB;
9868}
9869
9870// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9871// StoreOpcode is the store to use and Invert says whether the store should
9872// happen when the condition is false rather than true. If a STORE ON
9873// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9874MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9876 unsigned StoreOpcode,
9877 unsigned STOCOpcode,
9878 bool Invert) const {
9879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9880
9881 Register SrcReg = MI.getOperand(0).getReg();
9882 MachineOperand Base = MI.getOperand(1);
9883 int64_t Disp = MI.getOperand(2).getImm();
9884 Register IndexReg = MI.getOperand(3).getReg();
9885 unsigned CCValid = MI.getOperand(4).getImm();
9886 unsigned CCMask = MI.getOperand(5).getImm();
9887 DebugLoc DL = MI.getDebugLoc();
9888
9889 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9890
9891 // ISel pattern matching also adds a load memory operand of the same
9892 // address, so take special care to find the storing memory operand.
9893 MachineMemOperand *MMO = nullptr;
9894 for (auto *I : MI.memoperands())
9895 if (I->isStore()) {
9896 MMO = I;
9897 break;
9898 }
9899
9900 // Use STOCOpcode if possible. We could use different store patterns in
9901 // order to avoid matching the index register, but the performance trade-offs
9902 // might be more complicated in that case.
9903 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9904 if (Invert)
9905 CCMask ^= CCValid;
9906
9907 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9908 .addReg(SrcReg)
9909 .add(Base)
9910 .addImm(Disp)
9911 .addImm(CCValid)
9912 .addImm(CCMask)
9913 .addMemOperand(MMO);
9914
9915 MI.eraseFromParent();
9916 return MBB;
9917 }
9918
9919 // Get the condition needed to branch around the store.
9920 if (!Invert)
9921 CCMask ^= CCValid;
9922
9923 MachineBasicBlock *StartMBB = MBB;
9924 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
9925 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9926
9927 // Unless CC was killed in the CondStore instruction, mark it as
9928 // live-in to both FalseMBB and JoinMBB.
9929 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
9930 !checkCCKill(MI, JoinMBB)) {
9931 FalseMBB->addLiveIn(SystemZ::CC);
9932 JoinMBB->addLiveIn(SystemZ::CC);
9933 }
9934
9935 // StartMBB:
9936 // BRC CCMask, JoinMBB
9937 // # fallthrough to FalseMBB
9938 MBB = StartMBB;
9939 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9940 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9941 MBB->addSuccessor(JoinMBB);
9942 MBB->addSuccessor(FalseMBB);
9943
9944 // FalseMBB:
9945 // store %SrcReg, %Disp(%Index,%Base)
9946 // # fallthrough to JoinMBB
9947 MBB = FalseMBB;
9948 BuildMI(MBB, DL, TII->get(StoreOpcode))
9949 .addReg(SrcReg)
9950 .add(Base)
9951 .addImm(Disp)
9952 .addReg(IndexReg)
9953 .addMemOperand(MMO);
9954 MBB->addSuccessor(JoinMBB);
9955
9956 MI.eraseFromParent();
9957 return JoinMBB;
9958}
9959
9960// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
9962SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
9964 bool Unsigned) const {
9965 MachineFunction &MF = *MBB->getParent();
9966 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9967 MachineRegisterInfo &MRI = MF.getRegInfo();
9968
9969 // Synthetic instruction to compare 128-bit values.
9970 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
9971 Register Op0 = MI.getOperand(0).getReg();
9972 Register Op1 = MI.getOperand(1).getReg();
9973
9974 MachineBasicBlock *StartMBB = MBB;
9975 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
9976 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
9977
9978 // StartMBB:
9979 //
9980 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
9981 // Swap the inputs to get:
9982 // CC 1 if high(Op0) > high(Op1)
9983 // CC 2 if high(Op0) < high(Op1)
9984 // CC 0 if high(Op0) == high(Op1)
9985 //
9986 // If CC != 0, we'd done, so jump over the next instruction.
9987 //
9988 // VEC[L]G Op1, Op0
9989 // JNE JoinMBB
9990 // # fallthrough to HiEqMBB
9991 MBB = StartMBB;
9992 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
9993 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
9994 .addReg(Op1).addReg(Op0);
9995 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9997 MBB->addSuccessor(JoinMBB);
9998 MBB->addSuccessor(HiEqMBB);
9999
10000 // HiEqMBB:
10001 //
10002 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10003 // Since we already know the high parts are equal, the CC
10004 // result will only depend on the low parts:
10005 // CC 1 if low(Op0) > low(Op1)
10006 // CC 3 if low(Op0) <= low(Op1)
10007 //
10008 // VCHLGS Tmp, Op0, Op1
10009 // # fallthrough to JoinMBB
10010 MBB = HiEqMBB;
10011 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10012 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10013 .addReg(Op0).addReg(Op1);
10014 MBB->addSuccessor(JoinMBB);
10015
10016 // Mark CC as live-in to JoinMBB.
10017 JoinMBB->addLiveIn(SystemZ::CC);
10018
10019 MI.eraseFromParent();
10020 return JoinMBB;
10021}
10022
10023// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10024// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10025// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10026// whether the field should be inverted after performing BinOpcode (e.g. for
10027// NAND).
10028MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10029 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10030 bool Invert) const {
10031 MachineFunction &MF = *MBB->getParent();
10032 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10033 MachineRegisterInfo &MRI = MF.getRegInfo();
10034
10035 // Extract the operands. Base can be a register or a frame index.
10036 // Src2 can be a register or immediate.
10037 Register Dest = MI.getOperand(0).getReg();
10038 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10039 int64_t Disp = MI.getOperand(2).getImm();
10040 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10041 Register BitShift = MI.getOperand(4).getReg();
10042 Register NegBitShift = MI.getOperand(5).getReg();
10043 unsigned BitSize = MI.getOperand(6).getImm();
10044 DebugLoc DL = MI.getDebugLoc();
10045
10046 // Get the right opcodes for the displacement.
10047 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10048 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10049 assert(LOpcode && CSOpcode && "Displacement out of range");
10050
10051 // Create virtual registers for temporary results.
10052 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10053 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10054 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10055 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10056 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10057
10058 // Insert a basic block for the main loop.
10059 MachineBasicBlock *StartMBB = MBB;
10060 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10061 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10062
10063 // StartMBB:
10064 // ...
10065 // %OrigVal = L Disp(%Base)
10066 // # fall through to LoopMBB
10067 MBB = StartMBB;
10068 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10069 MBB->addSuccessor(LoopMBB);
10070
10071 // LoopMBB:
10072 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10073 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10074 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10075 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10076 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10077 // JNE LoopMBB
10078 // # fall through to DoneMBB
10079 MBB = LoopMBB;
10080 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10081 .addReg(OrigVal).addMBB(StartMBB)
10082 .addReg(Dest).addMBB(LoopMBB);
10083 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10084 .addReg(OldVal).addReg(BitShift).addImm(0);
10085 if (Invert) {
10086 // Perform the operation normally and then invert every bit of the field.
10087 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10088 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10089 // XILF with the upper BitSize bits set.
10090 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10091 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10092 } else if (BinOpcode)
10093 // A simply binary operation.
10094 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10095 .addReg(RotatedOldVal)
10096 .add(Src2);
10097 else
10098 // Use RISBG to rotate Src2 into position and use it to replace the
10099 // field in RotatedOldVal.
10100 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10101 .addReg(RotatedOldVal).addReg(Src2.getReg())
10102 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10103 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10104 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10105 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10106 .addReg(OldVal)
10107 .addReg(NewVal)
10108 .add(Base)
10109 .addImm(Disp);
10110 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10112 MBB->addSuccessor(LoopMBB);
10113 MBB->addSuccessor(DoneMBB);
10114
10115 MI.eraseFromParent();
10116 return DoneMBB;
10117}
10118
10119// Implement EmitInstrWithCustomInserter for subword pseudo
10120// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10121// instruction that should be used to compare the current field with the
10122// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10123// for when the current field should be kept.
10124MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10125 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10126 unsigned KeepOldMask) const {
10127 MachineFunction &MF = *MBB->getParent();
10128 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10129 MachineRegisterInfo &MRI = MF.getRegInfo();
10130
10131 // Extract the operands. Base can be a register or a frame index.
10132 Register Dest = MI.getOperand(0).getReg();
10133 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10134 int64_t Disp = MI.getOperand(2).getImm();
10135 Register Src2 = MI.getOperand(3).getReg();
10136 Register BitShift = MI.getOperand(4).getReg();
10137 Register NegBitShift = MI.getOperand(5).getReg();
10138 unsigned BitSize = MI.getOperand(6).getImm();
10139 DebugLoc DL = MI.getDebugLoc();
10140
10141 // Get the right opcodes for the displacement.
10142 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10143 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10144 assert(LOpcode && CSOpcode && "Displacement out of range");
10145
10146 // Create virtual registers for temporary results.
10147 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10148 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10149 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10150 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10151 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10152 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10153
10154 // Insert 3 basic blocks for the loop.
10155 MachineBasicBlock *StartMBB = MBB;
10156 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10157 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10158 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10159 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10160
10161 // StartMBB:
10162 // ...
10163 // %OrigVal = L Disp(%Base)
10164 // # fall through to LoopMBB
10165 MBB = StartMBB;
10166 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10167 MBB->addSuccessor(LoopMBB);
10168
10169 // LoopMBB:
10170 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10171 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10172 // CompareOpcode %RotatedOldVal, %Src2
10173 // BRC KeepOldMask, UpdateMBB
10174 MBB = LoopMBB;
10175 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10176 .addReg(OrigVal).addMBB(StartMBB)
10177 .addReg(Dest).addMBB(UpdateMBB);
10178 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10179 .addReg(OldVal).addReg(BitShift).addImm(0);
10180 BuildMI(MBB, DL, TII->get(CompareOpcode))
10181 .addReg(RotatedOldVal).addReg(Src2);
10182 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10183 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10184 MBB->addSuccessor(UpdateMBB);
10185 MBB->addSuccessor(UseAltMBB);
10186
10187 // UseAltMBB:
10188 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10189 // # fall through to UpdateMBB
10190 MBB = UseAltMBB;
10191 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10192 .addReg(RotatedOldVal).addReg(Src2)
10193 .addImm(32).addImm(31 + BitSize).addImm(0);
10194 MBB->addSuccessor(UpdateMBB);
10195
10196 // UpdateMBB:
10197 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10198 // [ %RotatedAltVal, UseAltMBB ]
10199 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10200 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10201 // JNE LoopMBB
10202 // # fall through to DoneMBB
10203 MBB = UpdateMBB;
10204 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10205 .addReg(RotatedOldVal).addMBB(LoopMBB)
10206 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10207 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10208 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10209 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10210 .addReg(OldVal)
10211 .addReg(NewVal)
10212 .add(Base)
10213 .addImm(Disp);
10214 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10216 MBB->addSuccessor(LoopMBB);
10217 MBB->addSuccessor(DoneMBB);
10218
10219 MI.eraseFromParent();
10220 return DoneMBB;
10221}
10222
10223// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10224// instruction MI.
10226SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10227 MachineBasicBlock *MBB) const {
10228 MachineFunction &MF = *MBB->getParent();
10229 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10230 MachineRegisterInfo &MRI = MF.getRegInfo();
10231
10232 // Extract the operands. Base can be a register or a frame index.
10233 Register Dest = MI.getOperand(0).getReg();
10234 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10235 int64_t Disp = MI.getOperand(2).getImm();
10236 Register CmpVal = MI.getOperand(3).getReg();
10237 Register OrigSwapVal = MI.getOperand(4).getReg();
10238 Register BitShift = MI.getOperand(5).getReg();
10239 Register NegBitShift = MI.getOperand(6).getReg();
10240 int64_t BitSize = MI.getOperand(7).getImm();
10241 DebugLoc DL = MI.getDebugLoc();
10242
10243 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10244
10245 // Get the right opcodes for the displacement and zero-extension.
10246 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10247 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10248 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10249 assert(LOpcode && CSOpcode && "Displacement out of range");
10250
10251 // Create virtual registers for temporary results.
10252 Register OrigOldVal = MRI.createVirtualRegister(RC);
10253 Register OldVal = MRI.createVirtualRegister(RC);
10254 Register SwapVal = MRI.createVirtualRegister(RC);
10255 Register StoreVal = MRI.createVirtualRegister(RC);
10256 Register OldValRot = MRI.createVirtualRegister(RC);
10257 Register RetryOldVal = MRI.createVirtualRegister(RC);
10258 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10259
10260 // Insert 2 basic blocks for the loop.
10261 MachineBasicBlock *StartMBB = MBB;
10262 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10263 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10264 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10265
10266 // StartMBB:
10267 // ...
10268 // %OrigOldVal = L Disp(%Base)
10269 // # fall through to LoopMBB
10270 MBB = StartMBB;
10271 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10272 .add(Base)
10273 .addImm(Disp)
10274 .addReg(0);
10275 MBB->addSuccessor(LoopMBB);
10276
10277 // LoopMBB:
10278 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10279 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10280 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10281 // ^^ The low BitSize bits contain the field
10282 // of interest.
10283 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10284 // ^^ Replace the upper 32-BitSize bits of the
10285 // swap value with those that we loaded and rotated.
10286 // %Dest = LL[CH] %OldValRot
10287 // CR %Dest, %CmpVal
10288 // JNE DoneMBB
10289 // # Fall through to SetMBB
10290 MBB = LoopMBB;
10291 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10292 .addReg(OrigOldVal).addMBB(StartMBB)
10293 .addReg(RetryOldVal).addMBB(SetMBB);
10294 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10295 .addReg(OrigSwapVal).addMBB(StartMBB)
10296 .addReg(RetrySwapVal).addMBB(SetMBB);
10297 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10298 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10299 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10300 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10301 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10302 .addReg(OldValRot);
10303 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10304 .addReg(Dest).addReg(CmpVal);
10305 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10308 MBB->addSuccessor(DoneMBB);
10309 MBB->addSuccessor(SetMBB);
10310
10311 // SetMBB:
10312 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10313 // ^^ Rotate the new field to its proper position.
10314 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10315 // JNE LoopMBB
10316 // # fall through to ExitMBB
10317 MBB = SetMBB;
10318 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10319 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10320 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10321 .addReg(OldVal)
10322 .addReg(StoreVal)
10323 .add(Base)
10324 .addImm(Disp);
10325 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10327 MBB->addSuccessor(LoopMBB);
10328 MBB->addSuccessor(DoneMBB);
10329
10330 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10331 // to the block after the loop. At this point, CC may have been defined
10332 // either by the CR in LoopMBB or by the CS in SetMBB.
10333 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10334 DoneMBB->addLiveIn(SystemZ::CC);
10335
10336 MI.eraseFromParent();
10337 return DoneMBB;
10338}
10339
10340// Emit a move from two GR64s to a GR128.
10342SystemZTargetLowering::emitPair128(MachineInstr &MI,
10343 MachineBasicBlock *MBB) const {
10344 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10345 const DebugLoc &DL = MI.getDebugLoc();
10346
10347 Register Dest = MI.getOperand(0).getReg();
10348 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10349 .add(MI.getOperand(1))
10350 .addImm(SystemZ::subreg_h64)
10351 .add(MI.getOperand(2))
10352 .addImm(SystemZ::subreg_l64);
10353 MI.eraseFromParent();
10354 return MBB;
10355}
10356
10357// Emit an extension from a GR64 to a GR128. ClearEven is true
10358// if the high register of the GR128 value must be cleared or false if
10359// it's "don't care".
10360MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10362 bool ClearEven) const {
10363 MachineFunction &MF = *MBB->getParent();
10364 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10365 MachineRegisterInfo &MRI = MF.getRegInfo();
10366 DebugLoc DL = MI.getDebugLoc();
10367
10368 Register Dest = MI.getOperand(0).getReg();
10369 Register Src = MI.getOperand(1).getReg();
10370 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10371
10372 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10373 if (ClearEven) {
10374 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10375 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10376
10377 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10378 .addImm(0);
10379 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10380 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10381 In128 = NewIn128;
10382 }
10383 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10384 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10385
10386 MI.eraseFromParent();
10387 return MBB;
10388}
10389
10391SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10393 unsigned Opcode, bool IsMemset) const {
10394 MachineFunction &MF = *MBB->getParent();
10395 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10396 MachineRegisterInfo &MRI = MF.getRegInfo();
10397 DebugLoc DL = MI.getDebugLoc();
10398
10399 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10400 uint64_t DestDisp = MI.getOperand(1).getImm();
10401 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10402 uint64_t SrcDisp;
10403
10404 // Fold the displacement Disp if it is out of range.
10405 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10406 if (!isUInt<12>(Disp)) {
10407 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10408 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10409 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10410 .add(Base).addImm(Disp).addReg(0);
10412 Disp = 0;
10413 }
10414 };
10415
10416 if (!IsMemset) {
10417 SrcBase = earlyUseOperand(MI.getOperand(2));
10418 SrcDisp = MI.getOperand(3).getImm();
10419 } else {
10420 SrcBase = DestBase;
10421 SrcDisp = DestDisp++;
10422 foldDisplIfNeeded(DestBase, DestDisp);
10423 }
10424
10425 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10426 bool IsImmForm = LengthMO.isImm();
10427 bool IsRegForm = !IsImmForm;
10428
10429 // Build and insert one Opcode of Length, with special treatment for memset.
10430 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10432 MachineOperand DBase, uint64_t DDisp,
10433 MachineOperand SBase, uint64_t SDisp,
10434 unsigned Length) -> void {
10435 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10436 if (IsMemset) {
10437 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10438 if (ByteMO.isImm())
10439 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10440 .add(SBase).addImm(SDisp).add(ByteMO);
10441 else
10442 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10443 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10444 if (--Length == 0)
10445 return;
10446 }
10447 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10448 .add(DBase).addImm(DDisp).addImm(Length)
10449 .add(SBase).addImm(SDisp)
10450 .setMemRefs(MI.memoperands());
10451 };
10452
10453 bool NeedsLoop = false;
10454 uint64_t ImmLength = 0;
10455 Register LenAdjReg = SystemZ::NoRegister;
10456 if (IsImmForm) {
10457 ImmLength = LengthMO.getImm();
10458 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10459 if (ImmLength == 0) {
10460 MI.eraseFromParent();
10461 return MBB;
10462 }
10463 if (Opcode == SystemZ::CLC) {
10464 if (ImmLength > 3 * 256)
10465 // A two-CLC sequence is a clear win over a loop, not least because
10466 // it needs only one branch. A three-CLC sequence needs the same
10467 // number of branches as a loop (i.e. 2), but is shorter. That
10468 // brings us to lengths greater than 768 bytes. It seems relatively
10469 // likely that a difference will be found within the first 768 bytes,
10470 // so we just optimize for the smallest number of branch
10471 // instructions, in order to avoid polluting the prediction buffer
10472 // too much.
10473 NeedsLoop = true;
10474 } else if (ImmLength > 6 * 256)
10475 // The heuristic we use is to prefer loops for anything that would
10476 // require 7 or more MVCs. With these kinds of sizes there isn't much
10477 // to choose between straight-line code and looping code, since the
10478 // time will be dominated by the MVCs themselves.
10479 NeedsLoop = true;
10480 } else {
10481 NeedsLoop = true;
10482 LenAdjReg = LengthMO.getReg();
10483 }
10484
10485 // When generating more than one CLC, all but the last will need to
10486 // branch to the end when a difference is found.
10487 MachineBasicBlock *EndMBB =
10488 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10490 : nullptr);
10491
10492 if (NeedsLoop) {
10493 Register StartCountReg =
10494 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10495 if (IsImmForm) {
10496 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10497 ImmLength &= 255;
10498 } else {
10499 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10500 .addReg(LenAdjReg)
10501 .addReg(0)
10502 .addImm(8);
10503 }
10504
10505 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10506 auto loadZeroAddress = [&]() -> MachineOperand {
10507 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10508 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10509 return MachineOperand::CreateReg(Reg, false);
10510 };
10511 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10512 DestBase = loadZeroAddress();
10513 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10514 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10515
10516 MachineBasicBlock *StartMBB = nullptr;
10517 MachineBasicBlock *LoopMBB = nullptr;
10518 MachineBasicBlock *NextMBB = nullptr;
10519 MachineBasicBlock *DoneMBB = nullptr;
10520 MachineBasicBlock *AllDoneMBB = nullptr;
10521
10522 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10523 Register StartDestReg =
10524 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10525
10526 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10527 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10528 Register ThisDestReg =
10529 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10530 Register NextSrcReg = MRI.createVirtualRegister(RC);
10531 Register NextDestReg =
10532 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10533 RC = &SystemZ::GR64BitRegClass;
10534 Register ThisCountReg = MRI.createVirtualRegister(RC);
10535 Register NextCountReg = MRI.createVirtualRegister(RC);
10536
10537 if (IsRegForm) {
10538 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10539 StartMBB = SystemZ::emitBlockAfter(MBB);
10540 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10541 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10542 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10543
10544 // MBB:
10545 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10546 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10547 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10548 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10550 .addMBB(AllDoneMBB);
10551 MBB->addSuccessor(AllDoneMBB);
10552 if (!IsMemset)
10553 MBB->addSuccessor(StartMBB);
10554 else {
10555 // MemsetOneCheckMBB:
10556 // # Jump to MemsetOneMBB for a memset of length 1, or
10557 // # fall thru to StartMBB.
10558 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10559 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10560 MBB->addSuccessor(MemsetOneCheckMBB);
10561 MBB = MemsetOneCheckMBB;
10562 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10563 .addReg(LenAdjReg).addImm(-1);
10564 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10566 .addMBB(MemsetOneMBB);
10567 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10568 MBB->addSuccessor(StartMBB, {90, 100});
10569
10570 // MemsetOneMBB:
10571 // # Jump back to AllDoneMBB after a single MVI or STC.
10572 MBB = MemsetOneMBB;
10573 insertMemMemOp(MBB, MBB->end(),
10574 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10575 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10576 1);
10577 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10578 MBB->addSuccessor(AllDoneMBB);
10579 }
10580
10581 // StartMBB:
10582 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10583 MBB = StartMBB;
10584 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10585 .addReg(StartCountReg).addImm(0);
10586 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10588 .addMBB(DoneMBB);
10589 MBB->addSuccessor(DoneMBB);
10590 MBB->addSuccessor(LoopMBB);
10591 }
10592 else {
10593 StartMBB = MBB;
10594 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10595 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10596 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10597
10598 // StartMBB:
10599 // # fall through to LoopMBB
10600 MBB->addSuccessor(LoopMBB);
10601
10602 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10603 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10604 if (EndMBB && !ImmLength)
10605 // If the loop handled the whole CLC range, DoneMBB will be empty with
10606 // CC live-through into EndMBB, so add it as live-in.
10607 DoneMBB->addLiveIn(SystemZ::CC);
10608 }
10609
10610 // LoopMBB:
10611 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10612 // [ %NextDestReg, NextMBB ]
10613 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10614 // [ %NextSrcReg, NextMBB ]
10615 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10616 // [ %NextCountReg, NextMBB ]
10617 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10618 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10619 // ( JLH EndMBB )
10620 //
10621 // The prefetch is used only for MVC. The JLH is used only for CLC.
10622 MBB = LoopMBB;
10623 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10624 .addReg(StartDestReg).addMBB(StartMBB)
10625 .addReg(NextDestReg).addMBB(NextMBB);
10626 if (!HaveSingleBase)
10627 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10628 .addReg(StartSrcReg).addMBB(StartMBB)
10629 .addReg(NextSrcReg).addMBB(NextMBB);
10630 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10631 .addReg(StartCountReg).addMBB(StartMBB)
10632 .addReg(NextCountReg).addMBB(NextMBB);
10633 if (Opcode == SystemZ::MVC)
10634 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10636 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10637 insertMemMemOp(MBB, MBB->end(),
10638 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10639 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10640 if (EndMBB) {
10641 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10643 .addMBB(EndMBB);
10644 MBB->addSuccessor(EndMBB);
10645 MBB->addSuccessor(NextMBB);
10646 }
10647
10648 // NextMBB:
10649 // %NextDestReg = LA 256(%ThisDestReg)
10650 // %NextSrcReg = LA 256(%ThisSrcReg)
10651 // %NextCountReg = AGHI %ThisCountReg, -1
10652 // CGHI %NextCountReg, 0
10653 // JLH LoopMBB
10654 // # fall through to DoneMBB
10655 //
10656 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10657 MBB = NextMBB;
10658 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10659 .addReg(ThisDestReg).addImm(256).addReg(0);
10660 if (!HaveSingleBase)
10661 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10662 .addReg(ThisSrcReg).addImm(256).addReg(0);
10663 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10664 .addReg(ThisCountReg).addImm(-1);
10665 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10666 .addReg(NextCountReg).addImm(0);
10667 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10669 .addMBB(LoopMBB);
10670 MBB->addSuccessor(LoopMBB);
10671 MBB->addSuccessor(DoneMBB);
10672
10673 MBB = DoneMBB;
10674 if (IsRegForm) {
10675 // DoneMBB:
10676 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10677 // # Use EXecute Relative Long for the remainder of the bytes. The target
10678 // instruction of the EXRL will have a length field of 1 since 0 is an
10679 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10680 // 0xff) + 1.
10681 // # Fall through to AllDoneMBB.
10682 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10683 Register RemDestReg = HaveSingleBase ? RemSrcReg
10684 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10685 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10686 .addReg(StartDestReg).addMBB(StartMBB)
10687 .addReg(NextDestReg).addMBB(NextMBB);
10688 if (!HaveSingleBase)
10689 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10690 .addReg(StartSrcReg).addMBB(StartMBB)
10691 .addReg(NextSrcReg).addMBB(NextMBB);
10692 if (IsMemset)
10693 insertMemMemOp(MBB, MBB->end(),
10694 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10695 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10696 MachineInstrBuilder EXRL_MIB =
10697 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10698 .addImm(Opcode)
10699 .addReg(LenAdjReg)
10700 .addReg(RemDestReg).addImm(DestDisp)
10701 .addReg(RemSrcReg).addImm(SrcDisp);
10702 MBB->addSuccessor(AllDoneMBB);
10703 MBB = AllDoneMBB;
10704 if (Opcode != SystemZ::MVC) {
10705 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10706 if (EndMBB)
10707 MBB->addLiveIn(SystemZ::CC);
10708 }
10709 }
10710 MF.getProperties().resetNoPHIs();
10711 }
10712
10713 // Handle any remaining bytes with straight-line code.
10714 while (ImmLength > 0) {
10715 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10716 // The previous iteration might have created out-of-range displacements.
10717 // Apply them using LA/LAY if so.
10718 foldDisplIfNeeded(DestBase, DestDisp);
10719 foldDisplIfNeeded(SrcBase, SrcDisp);
10720 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10721 DestDisp += ThisLength;
10722 SrcDisp += ThisLength;
10723 ImmLength -= ThisLength;
10724 // If there's another CLC to go, branch to the end if a difference
10725 // was found.
10726 if (EndMBB && ImmLength > 0) {
10727 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10728 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10730 .addMBB(EndMBB);
10731 MBB->addSuccessor(EndMBB);
10732 MBB->addSuccessor(NextMBB);
10733 MBB = NextMBB;
10734 }
10735 }
10736 if (EndMBB) {
10737 MBB->addSuccessor(EndMBB);
10738 MBB = EndMBB;
10739 MBB->addLiveIn(SystemZ::CC);
10740 }
10741
10742 MI.eraseFromParent();
10743 return MBB;
10744}
10745
10746// Decompose string pseudo-instruction MI into a loop that continually performs
10747// Opcode until CC != 3.
10748MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10749 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10750 MachineFunction &MF = *MBB->getParent();
10751 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10752 MachineRegisterInfo &MRI = MF.getRegInfo();
10753 DebugLoc DL = MI.getDebugLoc();
10754
10755 uint64_t End1Reg = MI.getOperand(0).getReg();
10756 uint64_t Start1Reg = MI.getOperand(1).getReg();
10757 uint64_t Start2Reg = MI.getOperand(2).getReg();
10758 uint64_t CharReg = MI.getOperand(3).getReg();
10759
10760 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10761 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10762 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10763 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10764
10765 MachineBasicBlock *StartMBB = MBB;
10766 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10767 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10768
10769 // StartMBB:
10770 // # fall through to LoopMBB
10771 MBB->addSuccessor(LoopMBB);
10772
10773 // LoopMBB:
10774 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10775 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10776 // R0L = %CharReg
10777 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10778 // JO LoopMBB
10779 // # fall through to DoneMBB
10780 //
10781 // The load of R0L can be hoisted by post-RA LICM.
10782 MBB = LoopMBB;
10783
10784 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10785 .addReg(Start1Reg).addMBB(StartMBB)
10786 .addReg(End1Reg).addMBB(LoopMBB);
10787 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10788 .addReg(Start2Reg).addMBB(StartMBB)
10789 .addReg(End2Reg).addMBB(LoopMBB);
10790 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10791 BuildMI(MBB, DL, TII->get(Opcode))
10792 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10793 .addReg(This1Reg).addReg(This2Reg);
10794 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10796 MBB->addSuccessor(LoopMBB);
10797 MBB->addSuccessor(DoneMBB);
10798
10799 DoneMBB->addLiveIn(SystemZ::CC);
10800
10801 MI.eraseFromParent();
10802 return DoneMBB;
10803}
10804
10805// Update TBEGIN instruction with final opcode and register clobbers.
10806MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10807 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10808 bool NoFloat) const {
10809 MachineFunction &MF = *MBB->getParent();
10810 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10811 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10812
10813 // Update opcode.
10814 MI.setDesc(TII->get(Opcode));
10815
10816 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10817 // Make sure to add the corresponding GRSM bits if they are missing.
10818 uint64_t Control = MI.getOperand(2).getImm();
10819 static const unsigned GPRControlBit[16] = {
10820 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10821 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10822 };
10823 Control |= GPRControlBit[15];
10824 if (TFI->hasFP(MF))
10825 Control |= GPRControlBit[11];
10826 MI.getOperand(2).setImm(Control);
10827
10828 // Add GPR clobbers.
10829 for (int I = 0; I < 16; I++) {
10830 if ((Control & GPRControlBit[I]) == 0) {
10831 unsigned Reg = SystemZMC::GR64Regs[I];
10832 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10833 }
10834 }
10835
10836 // Add FPR/VR clobbers.
10837 if (!NoFloat && (Control & 4) != 0) {
10838 if (Subtarget.hasVector()) {
10839 for (unsigned Reg : SystemZMC::VR128Regs) {
10840 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10841 }
10842 } else {
10843 for (unsigned Reg : SystemZMC::FP64Regs) {
10844 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10845 }
10846 }
10847 }
10848
10849 return MBB;
10850}
10851
10852MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10853 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10854 MachineFunction &MF = *MBB->getParent();
10855 MachineRegisterInfo *MRI = &MF.getRegInfo();
10856 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10857 DebugLoc DL = MI.getDebugLoc();
10858
10859 Register SrcReg = MI.getOperand(0).getReg();
10860
10861 // Create new virtual register of the same class as source.
10862 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10863 Register DstReg = MRI->createVirtualRegister(RC);
10864
10865 // Replace pseudo with a normal load-and-test that models the def as
10866 // well.
10867 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10868 .addReg(SrcReg)
10869 .setMIFlags(MI.getFlags());
10870 MI.eraseFromParent();
10871
10872 return MBB;
10873}
10874
10875MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10877 MachineFunction &MF = *MBB->getParent();
10878 MachineRegisterInfo *MRI = &MF.getRegInfo();
10879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10880 DebugLoc DL = MI.getDebugLoc();
10881 const unsigned ProbeSize = getStackProbeSize(MF);
10882 Register DstReg = MI.getOperand(0).getReg();
10883 Register SizeReg = MI.getOperand(2).getReg();
10884
10885 MachineBasicBlock *StartMBB = MBB;
10886 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10887 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10888 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10889 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10890 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10891
10892 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
10894
10895 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10896 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10897
10898 // LoopTestMBB
10899 // BRC TailTestMBB
10900 // # fallthrough to LoopBodyMBB
10901 StartMBB->addSuccessor(LoopTestMBB);
10902 MBB = LoopTestMBB;
10903 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10904 .addReg(SizeReg)
10905 .addMBB(StartMBB)
10906 .addReg(IncReg)
10907 .addMBB(LoopBodyMBB);
10908 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10909 .addReg(PHIReg)
10910 .addImm(ProbeSize);
10911 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10913 .addMBB(TailTestMBB);
10914 MBB->addSuccessor(LoopBodyMBB);
10915 MBB->addSuccessor(TailTestMBB);
10916
10917 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
10918 // J LoopTestMBB
10919 MBB = LoopBodyMBB;
10920 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
10921 .addReg(PHIReg)
10922 .addImm(ProbeSize);
10923 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
10924 .addReg(SystemZ::R15D)
10925 .addImm(ProbeSize);
10926 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10927 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
10928 .setMemRefs(VolLdMMO);
10929 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
10930 MBB->addSuccessor(LoopTestMBB);
10931
10932 // TailTestMBB
10933 // BRC DoneMBB
10934 // # fallthrough to TailMBB
10935 MBB = TailTestMBB;
10936 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10937 .addReg(PHIReg)
10938 .addImm(0);
10939 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10941 .addMBB(DoneMBB);
10942 MBB->addSuccessor(TailMBB);
10943 MBB->addSuccessor(DoneMBB);
10944
10945 // TailMBB
10946 // # fallthrough to DoneMBB
10947 MBB = TailMBB;
10948 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
10949 .addReg(SystemZ::R15D)
10950 .addReg(PHIReg);
10951 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10952 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
10953 .setMemRefs(VolLdMMO);
10954 MBB->addSuccessor(DoneMBB);
10955
10956 // DoneMBB
10957 MBB = DoneMBB;
10958 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
10959 .addReg(SystemZ::R15D);
10960
10961 MI.eraseFromParent();
10962 return DoneMBB;
10963}
10964
10965SDValue SystemZTargetLowering::
10966getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
10967 MachineFunction &MF = DAG.getMachineFunction();
10968 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
10969 SDLoc DL(SP);
10970 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
10971 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
10972}
10973
10976 switch (MI.getOpcode()) {
10977 case SystemZ::ADJCALLSTACKDOWN:
10978 case SystemZ::ADJCALLSTACKUP:
10979 return emitAdjCallStack(MI, MBB);
10980
10981 case SystemZ::Select32:
10982 case SystemZ::Select64:
10983 case SystemZ::Select128:
10984 case SystemZ::SelectF32:
10985 case SystemZ::SelectF64:
10986 case SystemZ::SelectF128:
10987 case SystemZ::SelectVR32:
10988 case SystemZ::SelectVR64:
10989 case SystemZ::SelectVR128:
10990 return emitSelect(MI, MBB);
10991
10992 case SystemZ::CondStore8Mux:
10993 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
10994 case SystemZ::CondStore8MuxInv:
10995 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
10996 case SystemZ::CondStore16Mux:
10997 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
10998 case SystemZ::CondStore16MuxInv:
10999 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11000 case SystemZ::CondStore32Mux:
11001 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11002 case SystemZ::CondStore32MuxInv:
11003 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11004 case SystemZ::CondStore8:
11005 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11006 case SystemZ::CondStore8Inv:
11007 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11008 case SystemZ::CondStore16:
11009 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11010 case SystemZ::CondStore16Inv:
11011 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11012 case SystemZ::CondStore32:
11013 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11014 case SystemZ::CondStore32Inv:
11015 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11016 case SystemZ::CondStore64:
11017 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11018 case SystemZ::CondStore64Inv:
11019 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11020 case SystemZ::CondStoreF32:
11021 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11022 case SystemZ::CondStoreF32Inv:
11023 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11024 case SystemZ::CondStoreF64:
11025 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11026 case SystemZ::CondStoreF64Inv:
11027 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11028
11029 case SystemZ::SCmp128Hi:
11030 return emitICmp128Hi(MI, MBB, false);
11031 case SystemZ::UCmp128Hi:
11032 return emitICmp128Hi(MI, MBB, true);
11033
11034 case SystemZ::PAIR128:
11035 return emitPair128(MI, MBB);
11036 case SystemZ::AEXT128:
11037 return emitExt128(MI, MBB, false);
11038 case SystemZ::ZEXT128:
11039 return emitExt128(MI, MBB, true);
11040
11041 case SystemZ::ATOMIC_SWAPW:
11042 return emitAtomicLoadBinary(MI, MBB, 0);
11043
11044 case SystemZ::ATOMIC_LOADW_AR:
11045 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11046 case SystemZ::ATOMIC_LOADW_AFI:
11047 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11048
11049 case SystemZ::ATOMIC_LOADW_SR:
11050 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11051
11052 case SystemZ::ATOMIC_LOADW_NR:
11053 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11054 case SystemZ::ATOMIC_LOADW_NILH:
11055 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11056
11057 case SystemZ::ATOMIC_LOADW_OR:
11058 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11059 case SystemZ::ATOMIC_LOADW_OILH:
11060 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11061
11062 case SystemZ::ATOMIC_LOADW_XR:
11063 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11064 case SystemZ::ATOMIC_LOADW_XILF:
11065 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11066
11067 case SystemZ::ATOMIC_LOADW_NRi:
11068 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11069 case SystemZ::ATOMIC_LOADW_NILHi:
11070 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11071
11072 case SystemZ::ATOMIC_LOADW_MIN:
11073 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11074 case SystemZ::ATOMIC_LOADW_MAX:
11075 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11076 case SystemZ::ATOMIC_LOADW_UMIN:
11077 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11078 case SystemZ::ATOMIC_LOADW_UMAX:
11079 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11080
11081 case SystemZ::ATOMIC_CMP_SWAPW:
11082 return emitAtomicCmpSwapW(MI, MBB);
11083 case SystemZ::MVCImm:
11084 case SystemZ::MVCReg:
11085 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11086 case SystemZ::NCImm:
11087 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11088 case SystemZ::OCImm:
11089 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11090 case SystemZ::XCImm:
11091 case SystemZ::XCReg:
11092 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11093 case SystemZ::CLCImm:
11094 case SystemZ::CLCReg:
11095 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11096 case SystemZ::MemsetImmImm:
11097 case SystemZ::MemsetImmReg:
11098 case SystemZ::MemsetRegImm:
11099 case SystemZ::MemsetRegReg:
11100 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11101 case SystemZ::CLSTLoop:
11102 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11103 case SystemZ::MVSTLoop:
11104 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11105 case SystemZ::SRSTLoop:
11106 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11107 case SystemZ::TBEGIN:
11108 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11109 case SystemZ::TBEGIN_nofloat:
11110 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11111 case SystemZ::TBEGINC:
11112 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11113 case SystemZ::LTEBRCompare_Pseudo:
11114 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11115 case SystemZ::LTDBRCompare_Pseudo:
11116 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11117 case SystemZ::LTXBRCompare_Pseudo:
11118 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11119
11120 case SystemZ::PROBED_ALLOCA:
11121 return emitProbedAlloca(MI, MBB);
11122 case SystemZ::EH_SjLj_SetJmp:
11123 return emitEHSjLjSetJmp(MI, MBB);
11124 case SystemZ::EH_SjLj_LongJmp:
11125 return emitEHSjLjLongJmp(MI, MBB);
11126
11127 case TargetOpcode::STACKMAP:
11128 case TargetOpcode::PATCHPOINT:
11129 return emitPatchPoint(MI, MBB);
11130
11131 default:
11132 llvm_unreachable("Unexpected instr type to insert");
11133 }
11134}
11135
11136// This is only used by the isel schedulers, and is needed only to prevent
11137// compiler from crashing when list-ilp is used.
11138const TargetRegisterClass *
11139SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11140 if (VT == MVT::Untyped)
11141 return &SystemZ::ADDR128BitRegClass;
11143}
11144
11145SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11146 SelectionDAG &DAG) const {
11147 SDLoc dl(Op);
11148 /*
11149 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11150 settings:
11151 00 Round to nearest
11152 01 Round to 0
11153 10 Round to +inf
11154 11 Round to -inf
11155
11156 FLT_ROUNDS, on the other hand, expects the following:
11157 -1 Undefined
11158 0 Round to 0
11159 1 Round to nearest
11160 2 Round to +inf
11161 3 Round to -inf
11162 */
11163
11164 // Save FPC to register.
11165 SDValue Chain = Op.getOperand(0);
11166 SDValue EFPC(
11167 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11168 Chain = EFPC.getValue(1);
11169
11170 // Transform as necessary
11171 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11172 DAG.getConstant(3, dl, MVT::i32));
11173 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11174 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11175 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11176 DAG.getConstant(1, dl, MVT::i32)));
11177
11178 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11179 DAG.getConstant(1, dl, MVT::i32));
11180 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11181
11182 return DAG.getMergeValues({RetVal, Chain}, dl);
11183}
11184
11185SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11186 SelectionDAG &DAG) const {
11187 EVT VT = Op.getValueType();
11188 Op = Op.getOperand(0);
11189 EVT OpVT = Op.getValueType();
11190
11191 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11192
11193 SDLoc DL(Op);
11194
11195 // load a 0 vector for the third operand of VSUM.
11196 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11197
11198 // execute VSUM.
11199 switch (OpVT.getScalarSizeInBits()) {
11200 case 8:
11201 case 16:
11202 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11203 [[fallthrough]];
11204 case 32:
11205 case 64:
11206 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11207 DAG.getBitcast(Op.getValueType(), Zero));
11208 break;
11209 case 128:
11210 break; // VSUM over v1i128 should not happen and would be a noop
11211 default:
11212 llvm_unreachable("Unexpected scalar size.");
11213 }
11214 // Cast to original vector type, retrieve last element.
11215 return DAG.getNode(
11216 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11217 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11218}
11219
11221 FunctionType *FT = F->getFunctionType();
11222 const AttributeList &Attrs = F->getAttributes();
11223 if (Attrs.hasRetAttrs())
11224 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11225 OS << *F->getReturnType() << " @" << F->getName() << "(";
11226 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11227 if (I)
11228 OS << ", ";
11229 OS << *FT->getParamType(I);
11230 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11231 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11232 if (ArgAttrs.hasAttribute(A))
11233 OS << " " << Attribute::getNameFromAttrKind(A);
11234 }
11235 OS << ")\n";
11236}
11237
11238bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11239 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11240 if (Itr == IsInternalCache.end())
11241 Itr = IsInternalCache
11242 .insert(std::pair<const Function *, bool>(
11243 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11244 .first;
11245 return Itr->second;
11246}
11247
11248void SystemZTargetLowering::
11249verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11250 const Function *F, SDValue Callee) const {
11251 // Temporarily only do the check when explicitly requested, until it can be
11252 // enabled by default.
11254 return;
11255
11256 bool IsInternal = false;
11257 const Function *CalleeFn = nullptr;
11258 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11259 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11260 IsInternal = isInternal(CalleeFn);
11261 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11262 errs() << "ERROR: Missing extension attribute of passed "
11263 << "value in call to function:\n" << "Callee: ";
11264 if (CalleeFn != nullptr)
11265 printFunctionArgExts(CalleeFn, errs());
11266 else
11267 errs() << "-\n";
11268 errs() << "Caller: ";
11270 llvm_unreachable("");
11271 }
11272}
11273
11274void SystemZTargetLowering::
11275verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11276 const Function *F) const {
11277 // Temporarily only do the check when explicitly requested, until it can be
11278 // enabled by default.
11280 return;
11281
11282 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11283 errs() << "ERROR: Missing extension attribute of returned "
11284 << "value from function:\n";
11286 llvm_unreachable("");
11287 }
11288}
11289
11290// Verify that narrow integer arguments are extended as required by the ABI.
11291// Return false if an error is found.
11292bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11293 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11294 if (!Subtarget.isTargetELF())
11295 return true;
11296
11299 return true;
11300 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11301 return true;
11302
11303 for (unsigned i = 0; i < Outs.size(); ++i) {
11304 MVT VT = Outs[i].VT;
11305 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11306 if (VT.isInteger()) {
11307 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11308 "Unexpected integer argument VT.");
11309 if (VT == MVT::i32 &&
11310 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11311 return false;
11312 }
11313 }
11314
11315 return true;
11316}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:954
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:696
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:233
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:809
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:782
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:506
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:773
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:843
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:513
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:466
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:870
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:412
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:900
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:983
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:433
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:834
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:480
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:781
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:543
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:786
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:960
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:644
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:465
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:571
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:840
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:801
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:889
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:878
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:968
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:795
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:479
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:459
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:473
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:495
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:472
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:916
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:500
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:422
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:560
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:949
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:911
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:987
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:453
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:846
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:823
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:529
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:551
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.