LLVM 22.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64
65 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
66 /// inline asm expressions.
67 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
68 InlineAsm::ConstraintCode ConstraintID,
69 std::vector<SDValue> &OutOps) override;
70
71 template <signed Low, signed High, signed Scale>
72 bool SelectRDVLImm(SDValue N, SDValue &Imm);
73
74 template <signed Low, signed High>
75 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
76
77 bool SelectAddUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
78
79 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
81 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
83 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
84 return SelectShiftedRegister(N, false, Reg, Shift);
85 }
86 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
87 return SelectShiftedRegister(N, true, Reg, Shift);
88 }
89 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
90 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
91 }
92 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
93 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
94 }
95 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
96 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
97 }
98 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
99 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
100 }
101 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
103 }
104 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
105 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
106 }
107 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
108 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
109 }
110 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
111 return SelectAddrModeIndexed(N, 1, Base, OffImm);
112 }
113 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
114 return SelectAddrModeIndexed(N, 2, Base, OffImm);
115 }
116 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
117 return SelectAddrModeIndexed(N, 4, Base, OffImm);
118 }
119 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
120 return SelectAddrModeIndexed(N, 8, Base, OffImm);
121 }
122 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
123 return SelectAddrModeIndexed(N, 16, Base, OffImm);
124 }
125 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
126 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
127 }
128 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
129 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
130 }
131 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
132 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
133 }
134 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
135 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
136 }
137 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
138 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
139 }
140 template <unsigned Size, unsigned Max>
141 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
142 // Test if there is an appropriate addressing mode and check if the
143 // immediate fits.
144 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
145 if (Found) {
146 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
147 int64_t C = CI->getSExtValue();
148 if (C <= Max)
149 return true;
150 }
151 }
152
153 // Otherwise, base only, materialize address in register.
154 Base = N;
155 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
156 return true;
157 }
158
159 template<int Width>
160 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
161 SDValue &SignExtend, SDValue &DoShift) {
162 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163 }
164
165 template<int Width>
166 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
167 SDValue &SignExtend, SDValue &DoShift) {
168 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
169 }
170
171 bool SelectExtractHigh(SDValue N, SDValue &Res) {
172 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
173 N = N->getOperand(0);
174 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
175 !isa<ConstantSDNode>(N->getOperand(1)))
176 return false;
177 EVT VT = N->getValueType(0);
178 EVT LVT = N->getOperand(0).getValueType();
179 unsigned Index = N->getConstantOperandVal(1);
180 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
181 Index != VT.getVectorNumElements())
182 return false;
183 Res = N->getOperand(0);
184 return true;
185 }
186
187 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
188 if (N.getOpcode() != AArch64ISD::VLSHR)
189 return false;
190 SDValue Op = N->getOperand(0);
191 EVT VT = Op.getValueType();
192 unsigned ShtAmt = N->getConstantOperandVal(1);
193 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
194 return false;
195
196 APInt Imm;
197 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
198 Imm = APInt(VT.getScalarSizeInBits(),
199 Op.getOperand(1).getConstantOperandVal(0)
200 << Op.getOperand(1).getConstantOperandVal(1));
201 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
202 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
203 Imm = APInt(VT.getScalarSizeInBits(),
204 Op.getOperand(1).getConstantOperandVal(0));
205 else
206 return false;
207
208 if (Imm != 1ULL << (ShtAmt - 1))
209 return false;
210
211 Res1 = Op.getOperand(0);
212 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
213 return true;
214 }
215
216 bool SelectDupZeroOrUndef(SDValue N) {
217 switch(N->getOpcode()) {
218 case ISD::UNDEF:
219 return true;
220 case AArch64ISD::DUP:
221 case ISD::SPLAT_VECTOR: {
222 auto Opnd0 = N->getOperand(0);
223 if (isNullConstant(Opnd0))
224 return true;
225 if (isNullFPConstant(Opnd0))
226 return true;
227 break;
228 }
229 default:
230 break;
231 }
232
233 return false;
234 }
235
236 bool SelectAny(SDValue) { return true; }
237
238 bool SelectDupZero(SDValue N) {
239 switch(N->getOpcode()) {
240 case AArch64ISD::DUP:
241 case ISD::SPLAT_VECTOR: {
242 auto Opnd0 = N->getOperand(0);
243 if (isNullConstant(Opnd0))
244 return true;
245 if (isNullFPConstant(Opnd0))
246 return true;
247 break;
248 }
249 }
250
251 return false;
252 }
253
254 template <MVT::SimpleValueType VT, bool Negate>
255 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
257 }
258
259 template <MVT::SimpleValueType VT, bool Negate>
260 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262 }
263
264 template <MVT::SimpleValueType VT>
265 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266 return SelectSVECpyDupImm(N, VT, Imm, Shift);
267 }
268
269 template <MVT::SimpleValueType VT, bool Invert = false>
270 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271 return SelectSVELogicalImm(N, VT, Imm, Invert);
272 }
273
274 template <MVT::SimpleValueType VT>
275 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276 return SelectSVEArithImm(N, VT, Imm);
277 }
278
279 template <unsigned Low, unsigned High, bool AllowSaturation = false>
280 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282 }
283
284 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285 if (N->getOpcode() != ISD::SPLAT_VECTOR)
286 return false;
287
288 EVT EltVT = N->getValueType(0).getVectorElementType();
289 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290 /* High */ EltVT.getFixedSizeInBits(),
291 /* AllowSaturation */ true, Imm);
292 }
293
294 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295 template<signed Min, signed Max, signed Scale, bool Shift>
296 bool SelectCntImm(SDValue N, SDValue &Imm) {
298 return false;
299
300 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301 if (Shift)
302 MulImm = 1LL << MulImm;
303
304 if ((MulImm % std::abs(Scale)) != 0)
305 return false;
306
307 MulImm /= Scale;
308 if ((MulImm >= Min) && (MulImm <= Max)) {
309 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310 return true;
311 }
312
313 return false;
314 }
315
316 template <signed Max, signed Scale>
317 bool SelectEXTImm(SDValue N, SDValue &Imm) {
319 return false;
320
321 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322
323 if (MulImm >= 0 && MulImm <= Max) {
324 MulImm *= Scale;
325 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326 return true;
327 }
328
329 return false;
330 }
331
332 template <unsigned BaseReg, unsigned Max>
333 bool ImmToReg(SDValue N, SDValue &Imm) {
334 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335 uint64_t C = CI->getZExtValue();
336
337 if (C > Max)
338 return false;
339
340 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341 return true;
342 }
343 return false;
344 }
345
346 /// Form sequences of consecutive 64/128-bit registers for use in NEON
347 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348 /// between 1 and 4 elements. If it contains a single element that is returned
349 /// unchanged; otherwise a REG_SEQUENCE value is returned.
352 // Form a sequence of SVE registers for instructions using list of vectors,
353 // e.g. structured loads and stores (ldN, stN).
354 SDValue createZTuple(ArrayRef<SDValue> Vecs);
355
356 // Similar to above, except the register must start at a multiple of the
357 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359
360 /// Generic helper for the createDTuple/createQTuple
361 /// functions. Those should almost always be called instead.
362 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363 const unsigned SubRegs[]);
364
365 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366
367 bool tryIndexedLoad(SDNode *N);
368
369 void SelectPtrauthAuth(SDNode *N);
370 void SelectPtrauthResign(SDNode *N);
371
372 bool trySelectStackSlotTagP(SDNode *N);
373 void SelectTagP(SDNode *N);
374
375 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376 unsigned SubRegIdx);
377 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378 unsigned SubRegIdx);
379 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382 unsigned Opc_rr, unsigned Opc_ri,
383 bool IsIntr = false);
384 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385 unsigned Scale, unsigned Opc_ri,
386 unsigned Opc_rr);
387 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388 bool IsZmMulti, unsigned Opcode,
389 bool HasPred = false);
390 void SelectPExtPair(SDNode *N, unsigned Opc);
391 void SelectWhilePair(SDNode *N, unsigned Opc);
392 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
395 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
396 bool IsTupleInput, unsigned Opc);
397 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
398
399 template <unsigned MaxIdx, unsigned Scale>
400 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
401 unsigned Op);
402 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
403 unsigned Op, unsigned MaxIdx, unsigned Scale,
404 unsigned BaseReg = 0);
405 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
406 /// SVE Reg+Imm addressing mode.
407 template <int64_t Min, int64_t Max>
408 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
409 SDValue &OffImm);
410 /// SVE Reg+Reg address mode.
411 template <unsigned Scale>
412 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
413 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
414 }
415
416 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
417 unsigned Opc, uint32_t MaxImm);
418
419 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
420
421 template <unsigned MaxIdx, unsigned Scale>
422 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
423 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
424 }
425
426 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
430 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
431 unsigned Opc_rr, unsigned Opc_ri);
432 std::tuple<unsigned, SDValue, SDValue>
433 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
434 const SDValue &OldBase, const SDValue &OldOffset,
435 unsigned Scale);
436
437 bool tryBitfieldExtractOp(SDNode *N);
438 bool tryBitfieldExtractOpFromSExt(SDNode *N);
439 bool tryBitfieldInsertOp(SDNode *N);
440 bool tryBitfieldInsertInZeroOp(SDNode *N);
441 bool tryShiftAmountMod(SDNode *N);
442
443 bool tryReadRegister(SDNode *N);
444 bool tryWriteRegister(SDNode *N);
445
446 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
447 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
448
449 bool trySelectXAR(SDNode *N);
450
451// Include the pieces autogenerated from the target description.
452#include "AArch64GenDAGISel.inc"
453
454private:
455 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
456 SDValue &Shift);
457 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
458 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
459 SDValue &OffImm) {
460 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
461 }
462 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
463 unsigned Size, SDValue &Base,
464 SDValue &OffImm);
465 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
466 SDValue &OffImm);
467 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
468 SDValue &OffImm);
469 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
470 SDValue &Offset, SDValue &SignExtend,
471 SDValue &DoShift);
472 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
473 SDValue &Offset, SDValue &SignExtend,
474 SDValue &DoShift);
475 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
476 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
477 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
478 SDValue &Offset, SDValue &SignExtend);
479
480 template<unsigned RegWidth>
481 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
482 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
483 }
484
485 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
486
487 template<unsigned RegWidth>
488 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
489 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
490 }
491
492 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
493 unsigned Width);
494
495 bool SelectCMP_SWAP(SDNode *N);
496
497 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
498 bool Negate);
499 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
500 bool Negate);
501 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
502 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
503
504 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
505 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
506 bool AllowSaturation, SDValue &Imm);
507
508 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
509 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
510 SDValue &Offset);
511 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
512 SDValue &Offset, unsigned Scale = 1);
513
514 bool SelectAllActivePredicate(SDValue N);
515 bool SelectAnyPredicate(SDValue N);
516
517 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
518
519 template <bool MatchCBB>
520 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
521};
522
523class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
524public:
525 static char ID;
526 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
527 CodeGenOptLevel OptLevel)
529 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
530};
531} // end anonymous namespace
532
533char AArch64DAGToDAGISelLegacy::ID = 0;
534
535INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
536
537/// isIntImmediate - This method tests to see if the node is a constant
538/// operand. If so Imm will receive the 32-bit value.
539static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
541 Imm = C->getZExtValue();
542 return true;
543 }
544 return false;
545}
546
547// isIntImmediate - This method tests to see if a constant operand.
548// If so Imm will receive the value.
549static bool isIntImmediate(SDValue N, uint64_t &Imm) {
550 return isIntImmediate(N.getNode(), Imm);
551}
552
553// isOpcWithIntImmediate - This method tests to see if the node is a specific
554// opcode and that it has a immediate integer right operand.
555// If so Imm will receive the 32 bit value.
556static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
557 uint64_t &Imm) {
558 return N->getOpcode() == Opc &&
559 isIntImmediate(N->getOperand(1).getNode(), Imm);
560}
561
562// isIntImmediateEq - This method tests to see if N is a constant operand that
563// is equivalent to 'ImmExpected'.
564#ifndef NDEBUG
565static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
566 uint64_t Imm;
567 if (!isIntImmediate(N.getNode(), Imm))
568 return false;
569 return Imm == ImmExpected;
570}
571#endif
572
573bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
574 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
575 std::vector<SDValue> &OutOps) {
576 switch(ConstraintID) {
577 default:
578 llvm_unreachable("Unexpected asm memory constraint");
579 case InlineAsm::ConstraintCode::m:
580 case InlineAsm::ConstraintCode::o:
581 case InlineAsm::ConstraintCode::Q:
582 // We need to make sure that this one operand does not end up in XZR, thus
583 // require the address to be in a PointerRegClass register.
584 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
585 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
586 SDLoc dl(Op);
587 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
588 SDValue NewOp =
589 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
590 dl, Op.getValueType(),
591 Op, RC), 0);
592 OutOps.push_back(NewOp);
593 return false;
594 }
595 return true;
596}
597
598/// SelectArithImmed - Select an immediate value that can be represented as
599/// a 12-bit value shifted left by either 0 or 12. If so, return true with
600/// Val set to the 12-bit value and Shift set to the shifter operand.
601bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
602 SDValue &Shift) {
603 // This function is called from the addsub_shifted_imm ComplexPattern,
604 // which lists [imm] as the list of opcode it's interested in, however
605 // we still need to check whether the operand is actually an immediate
606 // here because the ComplexPattern opcode list is only used in
607 // root-level opcode matching.
608 if (!isa<ConstantSDNode>(N.getNode()))
609 return false;
610
611 uint64_t Immed = N.getNode()->getAsZExtVal();
612 unsigned ShiftAmt;
613
614 if (Immed >> 12 == 0) {
615 ShiftAmt = 0;
616 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
617 ShiftAmt = 12;
618 Immed = Immed >> 12;
619 } else
620 return false;
621
622 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
623 SDLoc dl(N);
624 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
625 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
626 return true;
627}
628
629/// SelectNegArithImmed - As above, but negates the value before trying to
630/// select it.
631bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
632 SDValue &Shift) {
633 // This function is called from the addsub_shifted_imm ComplexPattern,
634 // which lists [imm] as the list of opcode it's interested in, however
635 // we still need to check whether the operand is actually an immediate
636 // here because the ComplexPattern opcode list is only used in
637 // root-level opcode matching.
638 if (!isa<ConstantSDNode>(N.getNode()))
639 return false;
640
641 // The immediate operand must be a 24-bit zero-extended immediate.
642 uint64_t Immed = N.getNode()->getAsZExtVal();
643
644 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
645 // have the opposite effect on the C flag, so this pattern mustn't match under
646 // those circumstances.
647 if (Immed == 0)
648 return false;
649
650 if (N.getValueType() == MVT::i32)
651 Immed = ~((uint32_t)Immed) + 1;
652 else
653 Immed = ~Immed + 1ULL;
654 if (Immed & 0xFFFFFFFFFF000000ULL)
655 return false;
656
657 Immed &= 0xFFFFFFULL;
658 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
659 Shift);
660}
661
662/// getShiftTypeForNode - Translate a shift node to the corresponding
663/// ShiftType value.
665 switch (N.getOpcode()) {
666 default:
668 case ISD::SHL:
669 return AArch64_AM::LSL;
670 case ISD::SRL:
671 return AArch64_AM::LSR;
672 case ISD::SRA:
673 return AArch64_AM::ASR;
674 case ISD::ROTR:
675 return AArch64_AM::ROR;
676 }
677}
678
680 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
681}
682
683/// Determine whether it is worth it to fold SHL into the addressing
684/// mode.
686 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
687 // It is worth folding logical shift of up to three places.
688 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
689 if (!CSD)
690 return false;
691 unsigned ShiftVal = CSD->getZExtValue();
692 if (ShiftVal > 3)
693 return false;
694
695 // Check if this particular node is reused in any non-memory related
696 // operation. If yes, do not try to fold this node into the address
697 // computation, since the computation will be kept.
698 const SDNode *Node = V.getNode();
699 for (SDNode *UI : Node->users())
700 if (!isMemOpOrPrefetch(UI))
701 for (SDNode *UII : UI->users())
702 if (!isMemOpOrPrefetch(UII))
703 return false;
704 return true;
705}
706
707/// Determine whether it is worth to fold V into an extended register addressing
708/// mode.
709bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
710 // Trivial if we are optimizing for code size or if there is only
711 // one use of the value.
712 if (CurDAG->shouldOptForSize() || V.hasOneUse())
713 return true;
714
715 // If a subtarget has a slow shift, folding a shift into multiple loads
716 // costs additional micro-ops.
717 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
718 return false;
719
720 // Check whether we're going to emit the address arithmetic anyway because
721 // it's used by a non-address operation.
722 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
723 return true;
724 if (V.getOpcode() == ISD::ADD) {
725 const SDValue LHS = V.getOperand(0);
726 const SDValue RHS = V.getOperand(1);
727 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
728 return true;
729 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
730 return true;
731 }
732
733 // It hurts otherwise, since the value will be reused.
734 return false;
735}
736
737/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
738/// to select more shifted register
739bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
740 SDValue &Shift) {
741 EVT VT = N.getValueType();
742 if (VT != MVT::i32 && VT != MVT::i64)
743 return false;
744
745 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
746 return false;
747 SDValue LHS = N.getOperand(0);
748 if (!LHS->hasOneUse())
749 return false;
750
751 unsigned LHSOpcode = LHS->getOpcode();
752 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
753 return false;
754
755 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
756 if (!ShiftAmtNode)
757 return false;
758
759 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
760 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
761 if (!RHSC)
762 return false;
763
764 APInt AndMask = RHSC->getAPIntValue();
765 unsigned LowZBits, MaskLen;
766 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
767 return false;
768
769 unsigned BitWidth = N.getValueSizeInBits();
770 SDLoc DL(LHS);
771 uint64_t NewShiftC;
772 unsigned NewShiftOp;
773 if (LHSOpcode == ISD::SHL) {
774 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
775 // BitWidth != LowZBits + MaskLen doesn't match the pattern
776 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
777 return false;
778
779 NewShiftC = LowZBits - ShiftAmtC;
780 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
781 } else {
782 if (LowZBits == 0)
783 return false;
784
785 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
786 NewShiftC = LowZBits + ShiftAmtC;
787 if (NewShiftC >= BitWidth)
788 return false;
789
790 // SRA need all high bits
791 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
792 return false;
793
794 // SRL high bits can be 0 or 1
795 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
796 return false;
797
798 if (LHSOpcode == ISD::SRL)
799 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
800 else
801 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
802 }
803
804 assert(NewShiftC < BitWidth && "Invalid shift amount");
805 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
806 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
807 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
808 NewShiftAmt, BitWidthMinus1),
809 0);
810 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
811 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
812 return true;
813}
814
815/// getExtendTypeForNode - Translate an extend node to the corresponding
816/// ExtendType value.
818getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
819 if (N.getOpcode() == ISD::SIGN_EXTEND ||
820 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
821 EVT SrcVT;
822 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
823 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
824 else
825 SrcVT = N.getOperand(0).getValueType();
826
827 if (!IsLoadStore && SrcVT == MVT::i8)
828 return AArch64_AM::SXTB;
829 else if (!IsLoadStore && SrcVT == MVT::i16)
830 return AArch64_AM::SXTH;
831 else if (SrcVT == MVT::i32)
832 return AArch64_AM::SXTW;
833 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
834
836 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
837 N.getOpcode() == ISD::ANY_EXTEND) {
838 EVT SrcVT = N.getOperand(0).getValueType();
839 if (!IsLoadStore && SrcVT == MVT::i8)
840 return AArch64_AM::UXTB;
841 else if (!IsLoadStore && SrcVT == MVT::i16)
842 return AArch64_AM::UXTH;
843 else if (SrcVT == MVT::i32)
844 return AArch64_AM::UXTW;
845 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
846
848 } else if (N.getOpcode() == ISD::AND) {
849 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
850 if (!CSD)
852 uint64_t AndMask = CSD->getZExtValue();
853
854 switch (AndMask) {
855 default:
857 case 0xFF:
858 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
859 case 0xFFFF:
860 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
861 case 0xFFFFFFFF:
862 return AArch64_AM::UXTW;
863 }
864 }
865
867}
868
869/// Determine whether it is worth to fold V into an extended register of an
870/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
871/// instruction, and the shift should be treated as worth folding even if has
872/// multiple uses.
873bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
874 // Trivial if we are optimizing for code size or if there is only
875 // one use of the value.
876 if (CurDAG->shouldOptForSize() || V.hasOneUse())
877 return true;
878
879 // If a subtarget has a fastpath LSL we can fold a logical shift into
880 // the add/sub and save a cycle.
881 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
882 V.getConstantOperandVal(1) <= 4 &&
884 return true;
885
886 // It hurts otherwise, since the value will be reused.
887 return false;
888}
889
890/// SelectShiftedRegister - Select a "shifted register" operand. If the value
891/// is not shifted, set the Shift operand to default of "LSL 0". The logical
892/// instructions allow the shifted register to be rotated, but the arithmetic
893/// instructions do not. The AllowROR parameter specifies whether ROR is
894/// supported.
895bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
896 SDValue &Reg, SDValue &Shift) {
897 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
898 return true;
899
901 if (ShType == AArch64_AM::InvalidShiftExtend)
902 return false;
903 if (!AllowROR && ShType == AArch64_AM::ROR)
904 return false;
905
906 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
907 unsigned BitSize = N.getValueSizeInBits();
908 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
909 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
910
911 Reg = N.getOperand(0);
912 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
913 return isWorthFoldingALU(N, true);
914 }
915
916 return false;
917}
918
919/// Instructions that accept extend modifiers like UXTW expect the register
920/// being extended to be a GPR32, but the incoming DAG might be acting on a
921/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
922/// this is the case.
924 if (N.getValueType() == MVT::i32)
925 return N;
926
927 SDLoc dl(N);
928 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
929}
930
931// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
932template<signed Low, signed High, signed Scale>
933bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
935 return false;
936
937 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
938 if ((MulImm % std::abs(Scale)) == 0) {
939 int64_t RDVLImm = MulImm / Scale;
940 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
941 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
942 return true;
943 }
944 }
945
946 return false;
947}
948
949// Returns a suitable RDSVL multiplier from a left shift.
950template <signed Low, signed High>
951bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
953 return false;
954
955 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
956 if (MulImm >= Low && MulImm <= High) {
957 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
958 return true;
959 }
960
961 return false;
962}
963
964/// SelectAddUXTXRegister - Select a "UXTX register" operand. This
965/// operand is referred by the instructions have SP operand
966bool AArch64DAGToDAGISel::SelectAddUXTXRegister(SDValue N, SDValue &Reg,
967 SDValue &Shift) {
968 // TODO: Relax condition to apply to more scenarios
969 if (N.getOpcode() != ISD::LOAD)
970 return false;
971 Reg = N;
972 Shift = CurDAG->getTargetConstant(getArithExtendImm(AArch64_AM::UXTX, 0),
973 SDLoc(N), MVT::i32);
974 return true;
975}
976
977/// SelectArithExtendedRegister - Select a "extended register" operand. This
978/// operand folds in an extend followed by an optional left shift.
979bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
980 SDValue &Shift) {
981 unsigned ShiftVal = 0;
983
984 if (N.getOpcode() == ISD::SHL) {
985 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
986 if (!CSD)
987 return false;
988 ShiftVal = CSD->getZExtValue();
989 if (ShiftVal > 4)
990 return false;
991
992 Ext = getExtendTypeForNode(N.getOperand(0));
994 return false;
995
996 Reg = N.getOperand(0).getOperand(0);
997 } else {
998 Ext = getExtendTypeForNode(N);
1000 return false;
1001
1002 Reg = N.getOperand(0);
1003
1004 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1005 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1006 auto isDef32 = [](SDValue N) {
1007 unsigned Opc = N.getOpcode();
1008 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1011 Opc != ISD::FREEZE;
1012 };
1013 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1014 isDef32(Reg))
1015 return false;
1016 }
1017
1018 // AArch64 mandates that the RHS of the operation must use the smallest
1019 // register class that could contain the size being extended from. Thus,
1020 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1021 // there might not be an actual 32-bit value in the program. We can
1022 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1023 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1024 Reg = narrowIfNeeded(CurDAG, Reg);
1025 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1026 MVT::i32);
1027 return isWorthFoldingALU(N);
1028}
1029
1030/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1031/// operand is referred by the instructions have SP operand
1032bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1033 SDValue &Shift) {
1034 unsigned ShiftVal = 0;
1036
1037 if (N.getOpcode() != ISD::SHL)
1038 return false;
1039
1040 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1041 if (!CSD)
1042 return false;
1043 ShiftVal = CSD->getZExtValue();
1044 if (ShiftVal > 4)
1045 return false;
1046
1047 Ext = AArch64_AM::UXTX;
1048 Reg = N.getOperand(0);
1049 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1050 MVT::i32);
1051 return isWorthFoldingALU(N);
1052}
1053
1054/// If there's a use of this ADDlow that's not itself a load/store then we'll
1055/// need to create a real ADD instruction from it anyway and there's no point in
1056/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1057/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1058/// leads to duplicated ADRP instructions.
1060 for (auto *User : N->users()) {
1061 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1062 User->getOpcode() != ISD::ATOMIC_LOAD &&
1063 User->getOpcode() != ISD::ATOMIC_STORE)
1064 return false;
1065
1066 // ldar and stlr have much more restrictive addressing modes (just a
1067 // register).
1068 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1069 return false;
1070 }
1071
1072 return true;
1073}
1074
1075/// Check if the immediate offset is valid as a scaled immediate.
1076static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1077 unsigned Size) {
1078 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1079 Offset < (Range << Log2_32(Size)))
1080 return true;
1081 return false;
1082}
1083
1084/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1085/// immediate" address. The "Size" argument is the size in bytes of the memory
1086/// reference, which determines the scale.
1087bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1088 unsigned BW, unsigned Size,
1089 SDValue &Base,
1090 SDValue &OffImm) {
1091 SDLoc dl(N);
1092 const DataLayout &DL = CurDAG->getDataLayout();
1093 const TargetLowering *TLI = getTargetLowering();
1094 if (N.getOpcode() == ISD::FrameIndex) {
1095 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1096 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1097 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1098 return true;
1099 }
1100
1101 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1102 // selected here doesn't support labels/immediates, only base+offset.
1103 if (CurDAG->isBaseWithConstantOffset(N)) {
1104 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1105 if (IsSignedImm) {
1106 int64_t RHSC = RHS->getSExtValue();
1107 unsigned Scale = Log2_32(Size);
1108 int64_t Range = 0x1LL << (BW - 1);
1109
1110 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1111 RHSC < (Range << Scale)) {
1112 Base = N.getOperand(0);
1113 if (Base.getOpcode() == ISD::FrameIndex) {
1114 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1115 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1116 }
1117 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1118 return true;
1119 }
1120 } else {
1121 // unsigned Immediate
1122 uint64_t RHSC = RHS->getZExtValue();
1123 unsigned Scale = Log2_32(Size);
1124 uint64_t Range = 0x1ULL << BW;
1125
1126 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1127 Base = N.getOperand(0);
1128 if (Base.getOpcode() == ISD::FrameIndex) {
1129 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1130 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1131 }
1132 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1133 return true;
1134 }
1135 }
1136 }
1137 }
1138 // Base only. The address will be materialized into a register before
1139 // the memory is accessed.
1140 // add x0, Xbase, #offset
1141 // stp x1, x2, [x0]
1142 Base = N;
1143 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1144 return true;
1145}
1146
1147/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1148/// immediate" address. The "Size" argument is the size in bytes of the memory
1149/// reference, which determines the scale.
1150bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1151 SDValue &Base, SDValue &OffImm) {
1152 SDLoc dl(N);
1153 const DataLayout &DL = CurDAG->getDataLayout();
1154 const TargetLowering *TLI = getTargetLowering();
1155 if (N.getOpcode() == ISD::FrameIndex) {
1156 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1157 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1158 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1159 return true;
1160 }
1161
1162 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1163 GlobalAddressSDNode *GAN =
1164 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1165 Base = N.getOperand(0);
1166 OffImm = N.getOperand(1);
1167 if (!GAN)
1168 return true;
1169
1170 if (GAN->getOffset() % Size == 0 &&
1172 return true;
1173 }
1174
1175 if (CurDAG->isBaseWithConstantOffset(N)) {
1176 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1177 int64_t RHSC = (int64_t)RHS->getZExtValue();
1178 unsigned Scale = Log2_32(Size);
1179 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1180 Base = N.getOperand(0);
1181 if (Base.getOpcode() == ISD::FrameIndex) {
1182 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1183 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1184 }
1185 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1186 return true;
1187 }
1188 }
1189 }
1190
1191 // Before falling back to our general case, check if the unscaled
1192 // instructions can handle this. If so, that's preferable.
1193 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1194 return false;
1195
1196 // Base only. The address will be materialized into a register before
1197 // the memory is accessed.
1198 // add x0, Xbase, #offset
1199 // ldr x0, [x0]
1200 Base = N;
1201 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1202 return true;
1203}
1204
1205/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1206/// immediate" address. This should only match when there is an offset that
1207/// is not valid for a scaled immediate addressing mode. The "Size" argument
1208/// is the size in bytes of the memory reference, which is needed here to know
1209/// what is valid for a scaled immediate.
1210bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1211 SDValue &Base,
1212 SDValue &OffImm) {
1213 if (!CurDAG->isBaseWithConstantOffset(N))
1214 return false;
1215 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1216 int64_t RHSC = RHS->getSExtValue();
1217 if (RHSC >= -256 && RHSC < 256) {
1218 Base = N.getOperand(0);
1219 if (Base.getOpcode() == ISD::FrameIndex) {
1220 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1221 const TargetLowering *TLI = getTargetLowering();
1222 Base = CurDAG->getTargetFrameIndex(
1223 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1224 }
1225 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1226 return true;
1227 }
1228 }
1229 return false;
1230}
1231
1233 SDLoc dl(N);
1234 SDValue ImpDef = SDValue(
1235 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1236 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1237 N);
1238}
1239
1240/// Check if the given SHL node (\p N), can be used to form an
1241/// extended register for an addressing mode.
1242bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1243 bool WantExtend, SDValue &Offset,
1244 SDValue &SignExtend) {
1245 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1246 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1247 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1248 return false;
1249
1250 SDLoc dl(N);
1251 if (WantExtend) {
1253 getExtendTypeForNode(N.getOperand(0), true);
1255 return false;
1256
1257 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1258 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1259 MVT::i32);
1260 } else {
1261 Offset = N.getOperand(0);
1262 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1263 }
1264
1265 unsigned LegalShiftVal = Log2_32(Size);
1266 unsigned ShiftVal = CSD->getZExtValue();
1267
1268 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1269 return false;
1270
1271 return isWorthFoldingAddr(N, Size);
1272}
1273
1274bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1276 SDValue &SignExtend,
1277 SDValue &DoShift) {
1278 if (N.getOpcode() != ISD::ADD)
1279 return false;
1280 SDValue LHS = N.getOperand(0);
1281 SDValue RHS = N.getOperand(1);
1282 SDLoc dl(N);
1283
1284 // We don't want to match immediate adds here, because they are better lowered
1285 // to the register-immediate addressing modes.
1287 return false;
1288
1289 // Check if this particular node is reused in any non-memory related
1290 // operation. If yes, do not try to fold this node into the address
1291 // computation, since the computation will be kept.
1292 const SDNode *Node = N.getNode();
1293 for (SDNode *UI : Node->users()) {
1294 if (!isMemOpOrPrefetch(UI))
1295 return false;
1296 }
1297
1298 // Remember if it is worth folding N when it produces extended register.
1299 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1300
1301 // Try to match a shifted extend on the RHS.
1302 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1303 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1304 Base = LHS;
1305 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1306 return true;
1307 }
1308
1309 // Try to match a shifted extend on the LHS.
1310 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1311 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1312 Base = RHS;
1313 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1314 return true;
1315 }
1316
1317 // There was no shift, whatever else we find.
1318 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1319
1321 // Try to match an unshifted extend on the LHS.
1322 if (IsExtendedRegisterWorthFolding &&
1323 (Ext = getExtendTypeForNode(LHS, true)) !=
1325 Base = RHS;
1326 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1327 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1328 MVT::i32);
1329 if (isWorthFoldingAddr(LHS, Size))
1330 return true;
1331 }
1332
1333 // Try to match an unshifted extend on the RHS.
1334 if (IsExtendedRegisterWorthFolding &&
1335 (Ext = getExtendTypeForNode(RHS, true)) !=
1337 Base = LHS;
1338 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1339 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1340 MVT::i32);
1341 if (isWorthFoldingAddr(RHS, Size))
1342 return true;
1343 }
1344
1345 return false;
1346}
1347
1348// Check if the given immediate is preferred by ADD. If an immediate can be
1349// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1350// encoded by one MOVZ, return true.
1351static bool isPreferredADD(int64_t ImmOff) {
1352 // Constant in [0x0, 0xfff] can be encoded in ADD.
1353 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1354 return true;
1355 // Check if it can be encoded in an "ADD LSL #12".
1356 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1357 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1358 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1359 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1360 return false;
1361}
1362
1363bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1365 SDValue &SignExtend,
1366 SDValue &DoShift) {
1367 if (N.getOpcode() != ISD::ADD)
1368 return false;
1369 SDValue LHS = N.getOperand(0);
1370 SDValue RHS = N.getOperand(1);
1371 SDLoc DL(N);
1372
1373 // Check if this particular node is reused in any non-memory related
1374 // operation. If yes, do not try to fold this node into the address
1375 // computation, since the computation will be kept.
1376 const SDNode *Node = N.getNode();
1377 for (SDNode *UI : Node->users()) {
1378 if (!isMemOpOrPrefetch(UI))
1379 return false;
1380 }
1381
1382 // Watch out if RHS is a wide immediate, it can not be selected into
1383 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1384 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1385 // instructions like:
1386 // MOV X0, WideImmediate
1387 // ADD X1, BaseReg, X0
1388 // LDR X2, [X1, 0]
1389 // For such situation, using [BaseReg, XReg] addressing mode can save one
1390 // ADD/SUB:
1391 // MOV X0, WideImmediate
1392 // LDR X2, [BaseReg, X0]
1393 if (isa<ConstantSDNode>(RHS)) {
1394 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1395 // Skip the immediate can be selected by load/store addressing mode.
1396 // Also skip the immediate can be encoded by a single ADD (SUB is also
1397 // checked by using -ImmOff).
1398 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1399 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1400 return false;
1401
1402 SDValue Ops[] = { RHS };
1403 SDNode *MOVI =
1404 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1405 SDValue MOVIV = SDValue(MOVI, 0);
1406 // This ADD of two X register will be selected into [Reg+Reg] mode.
1407 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1408 }
1409
1410 // Remember if it is worth folding N when it produces extended register.
1411 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1412
1413 // Try to match a shifted extend on the RHS.
1414 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1415 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1416 Base = LHS;
1417 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1418 return true;
1419 }
1420
1421 // Try to match a shifted extend on the LHS.
1422 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1423 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1424 Base = RHS;
1425 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1426 return true;
1427 }
1428
1429 // Match any non-shifted, non-extend, non-immediate add expression.
1430 Base = LHS;
1431 Offset = RHS;
1432 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1433 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1434 // Reg1 + Reg2 is free: no check needed.
1435 return true;
1436}
1437
1438SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1439 static const unsigned RegClassIDs[] = {
1440 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1441 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1442 AArch64::dsub2, AArch64::dsub3};
1443
1444 return createTuple(Regs, RegClassIDs, SubRegs);
1445}
1446
1447SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1448 static const unsigned RegClassIDs[] = {
1449 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1450 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1451 AArch64::qsub2, AArch64::qsub3};
1452
1453 return createTuple(Regs, RegClassIDs, SubRegs);
1454}
1455
1456SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1457 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1458 AArch64::ZPR3RegClassID,
1459 AArch64::ZPR4RegClassID};
1460 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1461 AArch64::zsub2, AArch64::zsub3};
1462
1463 return createTuple(Regs, RegClassIDs, SubRegs);
1464}
1465
1466SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1467 assert(Regs.size() == 2 || Regs.size() == 4);
1468
1469 // The createTuple interface requires 3 RegClassIDs for each possible
1470 // tuple type even though we only have them for ZPR2 and ZPR4.
1471 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1472 AArch64::ZPR4Mul4RegClassID};
1473 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1474 AArch64::zsub2, AArch64::zsub3};
1475 return createTuple(Regs, RegClassIDs, SubRegs);
1476}
1477
1478SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1479 const unsigned RegClassIDs[],
1480 const unsigned SubRegs[]) {
1481 // There's no special register-class for a vector-list of 1 element: it's just
1482 // a vector.
1483 if (Regs.size() == 1)
1484 return Regs[0];
1485
1486 assert(Regs.size() >= 2 && Regs.size() <= 4);
1487
1488 SDLoc DL(Regs[0]);
1489
1491
1492 // First operand of REG_SEQUENCE is the desired RegClass.
1493 Ops.push_back(
1494 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1495
1496 // Then we get pairs of source & subregister-position for the components.
1497 for (unsigned i = 0; i < Regs.size(); ++i) {
1498 Ops.push_back(Regs[i]);
1499 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1500 }
1501
1502 SDNode *N =
1503 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1504 return SDValue(N, 0);
1505}
1506
1507void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1508 bool isExt) {
1509 SDLoc dl(N);
1510 EVT VT = N->getValueType(0);
1511
1512 unsigned ExtOff = isExt;
1513
1514 // Form a REG_SEQUENCE to force register allocation.
1515 unsigned Vec0Off = ExtOff + 1;
1516 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1517 SDValue RegSeq = createQTuple(Regs);
1518
1520 if (isExt)
1521 Ops.push_back(N->getOperand(1));
1522 Ops.push_back(RegSeq);
1523 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1524 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1525}
1526
1527static std::tuple<SDValue, SDValue>
1529 SDLoc DL(Disc);
1530 SDValue AddrDisc;
1531 SDValue ConstDisc;
1532
1533 // If this is a blend, remember the constant and address discriminators.
1534 // Otherwise, it's either a constant discriminator, or a non-blended
1535 // address discriminator.
1536 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1537 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1538 AddrDisc = Disc->getOperand(1);
1539 ConstDisc = Disc->getOperand(2);
1540 } else {
1541 ConstDisc = Disc;
1542 }
1543
1544 // If the constant discriminator (either the blend RHS, or the entire
1545 // discriminator value) isn't a 16-bit constant, bail out, and let the
1546 // discriminator be computed separately.
1547 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1548 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1549 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1550
1551 // If there's no address discriminator, use XZR directly.
1552 if (!AddrDisc)
1553 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1554
1555 return std::make_tuple(
1556 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1557 AddrDisc);
1558}
1559
1560void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1561 SDLoc DL(N);
1562 // IntrinsicID is operand #0
1563 SDValue Val = N->getOperand(1);
1564 SDValue AUTKey = N->getOperand(2);
1565 SDValue AUTDisc = N->getOperand(3);
1566
1567 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1568 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1569
1570 SDValue AUTAddrDisc, AUTConstDisc;
1571 std::tie(AUTConstDisc, AUTAddrDisc) =
1572 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1573
1574 if (!Subtarget->isX16X17Safer()) {
1575 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1576 // Copy deactivation symbol if present.
1577 if (N->getNumOperands() > 4)
1578 Ops.push_back(N->getOperand(4));
1579
1580 SDNode *AUT =
1581 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1582 ReplaceNode(N, AUT);
1583 } else {
1584 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1585 AArch64::X16, Val, SDValue());
1586 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1587
1588 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1589 ReplaceNode(N, AUT);
1590 }
1591}
1592
1593void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1594 SDLoc DL(N);
1595 // IntrinsicID is operand #0
1596 SDValue Val = N->getOperand(1);
1597 SDValue AUTKey = N->getOperand(2);
1598 SDValue AUTDisc = N->getOperand(3);
1599 SDValue PACKey = N->getOperand(4);
1600 SDValue PACDisc = N->getOperand(5);
1601
1602 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1603 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1604
1605 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1606 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1607
1608 SDValue AUTAddrDisc, AUTConstDisc;
1609 std::tie(AUTConstDisc, AUTAddrDisc) =
1610 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1611
1612 SDValue PACAddrDisc, PACConstDisc;
1613 std::tie(PACConstDisc, PACAddrDisc) =
1614 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1615
1616 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1617 AArch64::X16, Val, SDValue());
1618
1619 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1620 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1621
1622 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1623 ReplaceNode(N, AUTPAC);
1624}
1625
1626bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1627 LoadSDNode *LD = cast<LoadSDNode>(N);
1628 if (LD->isUnindexed())
1629 return false;
1630 EVT VT = LD->getMemoryVT();
1631 EVT DstVT = N->getValueType(0);
1632 ISD::MemIndexedMode AM = LD->getAddressingMode();
1633 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1634 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1635 int OffsetVal = (int)OffsetOp->getZExtValue();
1636
1637 // We're not doing validity checking here. That was done when checking
1638 // if we should mark the load as indexed or not. We're just selecting
1639 // the right instruction.
1640 unsigned Opcode = 0;
1641
1642 ISD::LoadExtType ExtType = LD->getExtensionType();
1643 bool InsertTo64 = false;
1644 if (VT == MVT::i64)
1645 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1646 else if (VT == MVT::i32) {
1647 if (ExtType == ISD::NON_EXTLOAD)
1648 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1649 else if (ExtType == ISD::SEXTLOAD)
1650 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1651 else {
1652 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1653 InsertTo64 = true;
1654 // The result of the load is only i32. It's the subreg_to_reg that makes
1655 // it into an i64.
1656 DstVT = MVT::i32;
1657 }
1658 } else if (VT == MVT::i16) {
1659 if (ExtType == ISD::SEXTLOAD) {
1660 if (DstVT == MVT::i64)
1661 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1662 else
1663 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1664 } else {
1665 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1666 InsertTo64 = DstVT == MVT::i64;
1667 // The result of the load is only i32. It's the subreg_to_reg that makes
1668 // it into an i64.
1669 DstVT = MVT::i32;
1670 }
1671 } else if (VT == MVT::i8) {
1672 if (ExtType == ISD::SEXTLOAD) {
1673 if (DstVT == MVT::i64)
1674 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1675 else
1676 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1677 } else {
1678 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1679 InsertTo64 = DstVT == MVT::i64;
1680 // The result of the load is only i32. It's the subreg_to_reg that makes
1681 // it into an i64.
1682 DstVT = MVT::i32;
1683 }
1684 } else if (VT == MVT::f16) {
1685 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1686 } else if (VT == MVT::bf16) {
1687 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1688 } else if (VT == MVT::f32) {
1689 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1690 } else if (VT == MVT::f64 ||
1691 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1692 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1693 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1694 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1695 } else if (VT.is64BitVector()) {
1696 if (IsPre || OffsetVal != 8)
1697 return false;
1698 switch (VT.getScalarSizeInBits()) {
1699 case 8:
1700 Opcode = AArch64::LD1Onev8b_POST;
1701 break;
1702 case 16:
1703 Opcode = AArch64::LD1Onev4h_POST;
1704 break;
1705 case 32:
1706 Opcode = AArch64::LD1Onev2s_POST;
1707 break;
1708 case 64:
1709 Opcode = AArch64::LD1Onev1d_POST;
1710 break;
1711 default:
1712 llvm_unreachable("Expected vector element to be a power of 2");
1713 }
1714 } else if (VT.is128BitVector()) {
1715 if (IsPre || OffsetVal != 16)
1716 return false;
1717 switch (VT.getScalarSizeInBits()) {
1718 case 8:
1719 Opcode = AArch64::LD1Onev16b_POST;
1720 break;
1721 case 16:
1722 Opcode = AArch64::LD1Onev8h_POST;
1723 break;
1724 case 32:
1725 Opcode = AArch64::LD1Onev4s_POST;
1726 break;
1727 case 64:
1728 Opcode = AArch64::LD1Onev2d_POST;
1729 break;
1730 default:
1731 llvm_unreachable("Expected vector element to be a power of 2");
1732 }
1733 } else
1734 return false;
1735 SDValue Chain = LD->getChain();
1736 SDValue Base = LD->getBasePtr();
1737 SDLoc dl(N);
1738 // LD1 encodes an immediate offset by using XZR as the offset register.
1739 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1740 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1741 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1742 SDValue Ops[] = { Base, Offset, Chain };
1743 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1744 MVT::Other, Ops);
1745
1746 // Transfer memoperands.
1747 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1748 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1749
1750 // Either way, we're replacing the node, so tell the caller that.
1751 SDValue LoadedVal = SDValue(Res, 1);
1752 if (InsertTo64) {
1753 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1754 LoadedVal =
1755 SDValue(CurDAG->getMachineNode(
1756 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1757 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1758 SubReg),
1759 0);
1760 }
1761
1762 ReplaceUses(SDValue(N, 0), LoadedVal);
1763 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1764 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1765 CurDAG->RemoveDeadNode(N);
1766 return true;
1767}
1768
1769void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1770 unsigned SubRegIdx) {
1771 SDLoc dl(N);
1772 EVT VT = N->getValueType(0);
1773 SDValue Chain = N->getOperand(0);
1774
1775 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1776 Chain};
1777
1778 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1779
1780 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1781 SDValue SuperReg = SDValue(Ld, 0);
1782 for (unsigned i = 0; i < NumVecs; ++i)
1783 ReplaceUses(SDValue(N, i),
1784 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1785
1786 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1787
1788 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1789 // because it's too simple to have needed special treatment during lowering.
1790 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1791 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1792 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1793 }
1794
1795 CurDAG->RemoveDeadNode(N);
1796}
1797
1798void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1799 unsigned Opc, unsigned SubRegIdx) {
1800 SDLoc dl(N);
1801 EVT VT = N->getValueType(0);
1802 SDValue Chain = N->getOperand(0);
1803
1804 SDValue Ops[] = {N->getOperand(1), // Mem operand
1805 N->getOperand(2), // Incremental
1806 Chain};
1807
1808 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1809 MVT::Untyped, MVT::Other};
1810
1811 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1812
1813 // Update uses of write back register
1814 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1815
1816 // Update uses of vector list
1817 SDValue SuperReg = SDValue(Ld, 1);
1818 if (NumVecs == 1)
1819 ReplaceUses(SDValue(N, 0), SuperReg);
1820 else
1821 for (unsigned i = 0; i < NumVecs; ++i)
1822 ReplaceUses(SDValue(N, i),
1823 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1824
1825 // Update the chain
1826 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1827 CurDAG->RemoveDeadNode(N);
1828}
1829
1830/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1831/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1832/// new Base and an SDValue representing the new offset.
1833std::tuple<unsigned, SDValue, SDValue>
1834AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1835 unsigned Opc_ri,
1836 const SDValue &OldBase,
1837 const SDValue &OldOffset,
1838 unsigned Scale) {
1839 SDValue NewBase = OldBase;
1840 SDValue NewOffset = OldOffset;
1841 // Detect a possible Reg+Imm addressing mode.
1842 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1843 N, OldBase, NewBase, NewOffset);
1844
1845 // Detect a possible reg+reg addressing mode, but only if we haven't already
1846 // detected a Reg+Imm one.
1847 const bool IsRegReg =
1848 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1849
1850 // Select the instruction.
1851 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1852}
1853
1854enum class SelectTypeKind {
1855 Int1 = 0,
1856 Int = 1,
1857 FP = 2,
1859};
1860
1861/// This function selects an opcode from a list of opcodes, which is
1862/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1863/// element types, in this order.
1864template <SelectTypeKind Kind>
1865static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1866 // Only match scalable vector VTs
1867 if (!VT.isScalableVector())
1868 return 0;
1869
1870 EVT EltVT = VT.getVectorElementType();
1871 unsigned Key = VT.getVectorMinNumElements();
1872 switch (Kind) {
1874 break;
1876 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1877 EltVT != MVT::i64)
1878 return 0;
1879 break;
1881 if (EltVT != MVT::i1)
1882 return 0;
1883 break;
1884 case SelectTypeKind::FP:
1885 if (EltVT == MVT::bf16)
1886 Key = 16;
1887 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1888 EltVT != MVT::f64)
1889 return 0;
1890 break;
1891 }
1892
1893 unsigned Offset;
1894 switch (Key) {
1895 case 16: // 8-bit or bf16
1896 Offset = 0;
1897 break;
1898 case 8: // 16-bit
1899 Offset = 1;
1900 break;
1901 case 4: // 32-bit
1902 Offset = 2;
1903 break;
1904 case 2: // 64-bit
1905 Offset = 3;
1906 break;
1907 default:
1908 return 0;
1909 }
1910
1911 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1912}
1913
1914// This function is almost identical to SelectWhilePair, but has an
1915// extra check on the range of the immediate operand.
1916// TODO: Merge these two functions together at some point?
1917void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1918 // Immediate can be either 0 or 1.
1919 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1920 if (Imm->getZExtValue() > 1)
1921 return;
1922
1923 SDLoc DL(N);
1924 EVT VT = N->getValueType(0);
1925 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1926 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1927 SDValue SuperReg = SDValue(WhilePair, 0);
1928
1929 for (unsigned I = 0; I < 2; ++I)
1930 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1931 AArch64::psub0 + I, DL, VT, SuperReg));
1932
1933 CurDAG->RemoveDeadNode(N);
1934}
1935
1936void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1937 SDLoc DL(N);
1938 EVT VT = N->getValueType(0);
1939
1940 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1941
1942 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1943 SDValue SuperReg = SDValue(WhilePair, 0);
1944
1945 for (unsigned I = 0; I < 2; ++I)
1946 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1947 AArch64::psub0 + I, DL, VT, SuperReg));
1948
1949 CurDAG->RemoveDeadNode(N);
1950}
1951
1952void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1953 unsigned Opcode) {
1954 EVT VT = N->getValueType(0);
1955 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
1956 SDValue Ops = createZTuple(Regs);
1957 SDLoc DL(N);
1958 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1959 SDValue SuperReg = SDValue(Intrinsic, 0);
1960 for (unsigned i = 0; i < NumVecs; ++i)
1961 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1962 AArch64::zsub0 + i, DL, VT, SuperReg));
1963
1964 CurDAG->RemoveDeadNode(N);
1965}
1966
1967void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
1968 unsigned Opcode) {
1969 SDLoc DL(N);
1970 EVT VT = N->getValueType(0);
1971 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
1972 Ops.push_back(/*Chain*/ N->getOperand(0));
1973
1974 SDNode *Instruction =
1975 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
1976 SDValue SuperReg = SDValue(Instruction, 0);
1977
1978 for (unsigned i = 0; i < NumVecs; ++i)
1979 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1980 AArch64::zsub0 + i, DL, VT, SuperReg));
1981
1982 // Copy chain
1983 unsigned ChainIdx = NumVecs;
1984 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
1985 CurDAG->RemoveDeadNode(N);
1986}
1987
1988void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1989 unsigned NumVecs,
1990 bool IsZmMulti,
1991 unsigned Opcode,
1992 bool HasPred) {
1993 assert(Opcode != 0 && "Unexpected opcode");
1994
1995 SDLoc DL(N);
1996 EVT VT = N->getValueType(0);
1997 unsigned FirstVecIdx = HasPred ? 2 : 1;
1998
1999 auto GetMultiVecOperand = [=](unsigned StartIdx) {
2000 SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs));
2001 return createZMulTuple(Regs);
2002 };
2003
2004 SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
2005
2006 SDValue Zm;
2007 if (IsZmMulti)
2008 Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
2009 else
2010 Zm = N->getOperand(NumVecs + FirstVecIdx);
2011
2012 SDNode *Intrinsic;
2013 if (HasPred)
2014 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
2015 N->getOperand(1), Zdn, Zm);
2016 else
2017 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
2018 SDValue SuperReg = SDValue(Intrinsic, 0);
2019 for (unsigned i = 0; i < NumVecs; ++i)
2020 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2021 AArch64::zsub0 + i, DL, VT, SuperReg));
2022
2023 CurDAG->RemoveDeadNode(N);
2024}
2025
2026void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2027 unsigned Scale, unsigned Opc_ri,
2028 unsigned Opc_rr, bool IsIntr) {
2029 assert(Scale < 5 && "Invalid scaling value.");
2030 SDLoc DL(N);
2031 EVT VT = N->getValueType(0);
2032 SDValue Chain = N->getOperand(0);
2033
2034 // Optimize addressing mode.
2036 unsigned Opc;
2037 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2038 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2039 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2040
2041 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2042 Base, // Memory operand
2043 Offset, Chain};
2044
2045 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2046
2047 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2048 SDValue SuperReg = SDValue(Load, 0);
2049 for (unsigned i = 0; i < NumVecs; ++i)
2050 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2051 AArch64::zsub0 + i, DL, VT, SuperReg));
2052
2053 // Copy chain
2054 unsigned ChainIdx = NumVecs;
2055 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2056 CurDAG->RemoveDeadNode(N);
2057}
2058
2059void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2060 unsigned NumVecs,
2061 unsigned Scale,
2062 unsigned Opc_ri,
2063 unsigned Opc_rr) {
2064 assert(Scale < 4 && "Invalid scaling value.");
2065 SDLoc DL(N);
2066 EVT VT = N->getValueType(0);
2067 SDValue Chain = N->getOperand(0);
2068
2069 SDValue PNg = N->getOperand(2);
2070 SDValue Base = N->getOperand(3);
2071 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2072 unsigned Opc;
2073 std::tie(Opc, Base, Offset) =
2074 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2075
2076 SDValue Ops[] = {PNg, // Predicate-as-counter
2077 Base, // Memory operand
2078 Offset, Chain};
2079
2080 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2081
2082 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2083 SDValue SuperReg = SDValue(Load, 0);
2084 for (unsigned i = 0; i < NumVecs; ++i)
2085 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2086 AArch64::zsub0 + i, DL, VT, SuperReg));
2087
2088 // Copy chain
2089 unsigned ChainIdx = NumVecs;
2090 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2091 CurDAG->RemoveDeadNode(N);
2092}
2093
2094void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2095 unsigned Opcode) {
2096 if (N->getValueType(0) != MVT::nxv4f32)
2097 return;
2098 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2099}
2100
2101void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2102 unsigned NumOutVecs,
2103 unsigned Opc,
2104 uint32_t MaxImm) {
2105 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2106 if (Imm->getZExtValue() > MaxImm)
2107 return;
2108
2109 SDValue ZtValue;
2110 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2111 return;
2112
2113 SDValue Chain = Node->getOperand(0);
2114 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2115 SDLoc DL(Node);
2116 EVT VT = Node->getValueType(0);
2117
2118 SDNode *Instruction =
2119 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2120 SDValue SuperReg = SDValue(Instruction, 0);
2121
2122 for (unsigned I = 0; I < NumOutVecs; ++I)
2123 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2124 AArch64::zsub0 + I, DL, VT, SuperReg));
2125
2126 // Copy chain
2127 unsigned ChainIdx = NumOutVecs;
2128 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2129 CurDAG->RemoveDeadNode(Node);
2130}
2131
2132void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2133 unsigned NumOutVecs,
2134 unsigned Opc) {
2135 SDValue ZtValue;
2136 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2137 return;
2138
2139 SDValue Chain = Node->getOperand(0);
2140 SDValue Ops[] = {ZtValue,
2141 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2142 Chain};
2143
2144 SDLoc DL(Node);
2145 EVT VT = Node->getValueType(0);
2146
2147 SDNode *Instruction =
2148 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2149 SDValue SuperReg = SDValue(Instruction, 0);
2150
2151 for (unsigned I = 0; I < NumOutVecs; ++I)
2152 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2153 AArch64::zsub0 + I, DL, VT, SuperReg));
2154
2155 // Copy chain
2156 unsigned ChainIdx = NumOutVecs;
2157 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2158 CurDAG->RemoveDeadNode(Node);
2159}
2160
2161void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2162 unsigned Op) {
2163 SDLoc DL(N);
2164 EVT VT = N->getValueType(0);
2165
2166 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2167 SDValue Zd = createZMulTuple(Regs);
2168 SDValue Zn = N->getOperand(1 + NumVecs);
2169 SDValue Zm = N->getOperand(2 + NumVecs);
2170
2171 SDValue Ops[] = {Zd, Zn, Zm};
2172
2173 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2174 SDValue SuperReg = SDValue(Intrinsic, 0);
2175 for (unsigned i = 0; i < NumVecs; ++i)
2176 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2177 AArch64::zsub0 + i, DL, VT, SuperReg));
2178
2179 CurDAG->RemoveDeadNode(N);
2180}
2181
2182bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2183 switch (BaseReg) {
2184 default:
2185 return false;
2186 case AArch64::ZA:
2187 case AArch64::ZAB0:
2188 if (TileNum == 0)
2189 break;
2190 return false;
2191 case AArch64::ZAH0:
2192 if (TileNum <= 1)
2193 break;
2194 return false;
2195 case AArch64::ZAS0:
2196 if (TileNum <= 3)
2197 break;
2198 return false;
2199 case AArch64::ZAD0:
2200 if (TileNum <= 7)
2201 break;
2202 return false;
2203 }
2204
2205 BaseReg += TileNum;
2206 return true;
2207}
2208
2209template <unsigned MaxIdx, unsigned Scale>
2210void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2211 unsigned BaseReg, unsigned Op) {
2212 unsigned TileNum = 0;
2213 if (BaseReg != AArch64::ZA)
2214 TileNum = N->getConstantOperandVal(2);
2215
2216 if (!SelectSMETile(BaseReg, TileNum))
2217 return;
2218
2219 SDValue SliceBase, Base, Offset;
2220 if (BaseReg == AArch64::ZA)
2221 SliceBase = N->getOperand(2);
2222 else
2223 SliceBase = N->getOperand(3);
2224
2225 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2226 return;
2227
2228 SDLoc DL(N);
2229 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2230 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2231 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2232
2233 EVT VT = N->getValueType(0);
2234 for (unsigned I = 0; I < NumVecs; ++I)
2235 ReplaceUses(SDValue(N, I),
2236 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2237 SDValue(Mov, 0)));
2238 // Copy chain
2239 unsigned ChainIdx = NumVecs;
2240 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2241 CurDAG->RemoveDeadNode(N);
2242}
2243
2244void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2245 unsigned Op, unsigned MaxIdx,
2246 unsigned Scale, unsigned BaseReg) {
2247 // Slice can be in different positions
2248 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2249 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2250 SDValue SliceBase = N->getOperand(2);
2251 if (BaseReg != AArch64::ZA)
2252 SliceBase = N->getOperand(3);
2253
2255 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2256 return;
2257 // The correct Za tile number is computed in Machine Instruction
2258 // See EmitZAInstr
2259 // DAG cannot select Za tile as an output register with ZReg
2260 SDLoc DL(N);
2262 if (BaseReg != AArch64::ZA )
2263 Ops.push_back(N->getOperand(2));
2264 Ops.push_back(Base);
2265 Ops.push_back(Offset);
2266 Ops.push_back(N->getOperand(0)); //Chain
2267 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2268
2269 EVT VT = N->getValueType(0);
2270 for (unsigned I = 0; I < NumVecs; ++I)
2271 ReplaceUses(SDValue(N, I),
2272 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2273 SDValue(Mov, 0)));
2274
2275 // Copy chain
2276 unsigned ChainIdx = NumVecs;
2277 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2278 CurDAG->RemoveDeadNode(N);
2279}
2280
2281void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2282 unsigned NumOutVecs,
2283 bool IsTupleInput,
2284 unsigned Opc) {
2285 SDLoc DL(N);
2286 EVT VT = N->getValueType(0);
2287 unsigned NumInVecs = N->getNumOperands() - 1;
2288
2290 if (IsTupleInput) {
2291 assert((NumInVecs == 2 || NumInVecs == 4) &&
2292 "Don't know how to handle multi-register input!");
2293 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2294 Ops.push_back(createZMulTuple(Regs));
2295 } else {
2296 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2297 for (unsigned I = 0; I < NumInVecs; I++)
2298 Ops.push_back(N->getOperand(1 + I));
2299 }
2300
2301 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2302 SDValue SuperReg = SDValue(Res, 0);
2303
2304 for (unsigned I = 0; I < NumOutVecs; I++)
2305 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2306 AArch64::zsub0 + I, DL, VT, SuperReg));
2307 CurDAG->RemoveDeadNode(N);
2308}
2309
2310void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2311 unsigned Opc) {
2312 SDLoc dl(N);
2313 EVT VT = N->getOperand(2)->getValueType(0);
2314
2315 // Form a REG_SEQUENCE to force register allocation.
2316 bool Is128Bit = VT.getSizeInBits() == 128;
2317 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2318 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2319
2320 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2321 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2322
2323 // Transfer memoperands.
2324 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2325 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2326
2327 ReplaceNode(N, St);
2328}
2329
2330void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2331 unsigned Scale, unsigned Opc_rr,
2332 unsigned Opc_ri) {
2333 SDLoc dl(N);
2334
2335 // Form a REG_SEQUENCE to force register allocation.
2336 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2337 SDValue RegSeq = createZTuple(Regs);
2338
2339 // Optimize addressing mode.
2340 unsigned Opc;
2342 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2343 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2344 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2345
2346 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2347 Base, // address
2348 Offset, // offset
2349 N->getOperand(0)}; // chain
2350 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2351
2352 ReplaceNode(N, St);
2353}
2354
2355bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2356 SDValue &OffImm) {
2357 SDLoc dl(N);
2358 const DataLayout &DL = CurDAG->getDataLayout();
2359 const TargetLowering *TLI = getTargetLowering();
2360
2361 // Try to match it for the frame address
2362 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2363 int FI = FINode->getIndex();
2364 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2365 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2366 return true;
2367 }
2368
2369 return false;
2370}
2371
2372void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2373 unsigned Opc) {
2374 SDLoc dl(N);
2375 EVT VT = N->getOperand(2)->getValueType(0);
2376 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2377 MVT::Other}; // Type for the Chain
2378
2379 // Form a REG_SEQUENCE to force register allocation.
2380 bool Is128Bit = VT.getSizeInBits() == 128;
2381 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2382 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2383
2384 SDValue Ops[] = {RegSeq,
2385 N->getOperand(NumVecs + 1), // base register
2386 N->getOperand(NumVecs + 2), // Incremental
2387 N->getOperand(0)}; // Chain
2388 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2389
2390 ReplaceNode(N, St);
2391}
2392
2393namespace {
2394/// WidenVector - Given a value in the V64 register class, produce the
2395/// equivalent value in the V128 register class.
2396class WidenVector {
2397 SelectionDAG &DAG;
2398
2399public:
2400 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2401
2402 SDValue operator()(SDValue V64Reg) {
2403 EVT VT = V64Reg.getValueType();
2404 unsigned NarrowSize = VT.getVectorNumElements();
2405 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2406 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2407 SDLoc DL(V64Reg);
2408
2409 SDValue Undef =
2410 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2411 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2412 }
2413};
2414} // namespace
2415
2416/// NarrowVector - Given a value in the V128 register class, produce the
2417/// equivalent value in the V64 register class.
2419 EVT VT = V128Reg.getValueType();
2420 unsigned WideSize = VT.getVectorNumElements();
2421 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2422 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2423
2424 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2425 V128Reg);
2426}
2427
2428void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2429 unsigned Opc) {
2430 SDLoc dl(N);
2431 EVT VT = N->getValueType(0);
2432 bool Narrow = VT.getSizeInBits() == 64;
2433
2434 // Form a REG_SEQUENCE to force register allocation.
2435 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2436
2437 if (Narrow)
2438 transform(Regs, Regs.begin(),
2439 WidenVector(*CurDAG));
2440
2441 SDValue RegSeq = createQTuple(Regs);
2442
2443 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2444
2445 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2446
2447 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2448 N->getOperand(NumVecs + 3), N->getOperand(0)};
2449 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2450 SDValue SuperReg = SDValue(Ld, 0);
2451
2452 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2453 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2454 AArch64::qsub2, AArch64::qsub3 };
2455 for (unsigned i = 0; i < NumVecs; ++i) {
2456 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2457 if (Narrow)
2458 NV = NarrowVector(NV, *CurDAG);
2459 ReplaceUses(SDValue(N, i), NV);
2460 }
2461
2462 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2463 CurDAG->RemoveDeadNode(N);
2464}
2465
2466void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2467 unsigned Opc) {
2468 SDLoc dl(N);
2469 EVT VT = N->getValueType(0);
2470 bool Narrow = VT.getSizeInBits() == 64;
2471
2472 // Form a REG_SEQUENCE to force register allocation.
2473 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2474
2475 if (Narrow)
2476 transform(Regs, Regs.begin(),
2477 WidenVector(*CurDAG));
2478
2479 SDValue RegSeq = createQTuple(Regs);
2480
2481 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2482 RegSeq->getValueType(0), MVT::Other};
2483
2484 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2485
2486 SDValue Ops[] = {RegSeq,
2487 CurDAG->getTargetConstant(LaneNo, dl,
2488 MVT::i64), // Lane Number
2489 N->getOperand(NumVecs + 2), // Base register
2490 N->getOperand(NumVecs + 3), // Incremental
2491 N->getOperand(0)};
2492 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2493
2494 // Update uses of the write back register
2495 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2496
2497 // Update uses of the vector list
2498 SDValue SuperReg = SDValue(Ld, 1);
2499 if (NumVecs == 1) {
2500 ReplaceUses(SDValue(N, 0),
2501 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2502 } else {
2503 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2504 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2505 AArch64::qsub2, AArch64::qsub3 };
2506 for (unsigned i = 0; i < NumVecs; ++i) {
2507 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2508 SuperReg);
2509 if (Narrow)
2510 NV = NarrowVector(NV, *CurDAG);
2511 ReplaceUses(SDValue(N, i), NV);
2512 }
2513 }
2514
2515 // Update the Chain
2516 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2517 CurDAG->RemoveDeadNode(N);
2518}
2519
2520void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2521 unsigned Opc) {
2522 SDLoc dl(N);
2523 EVT VT = N->getOperand(2)->getValueType(0);
2524 bool Narrow = VT.getSizeInBits() == 64;
2525
2526 // Form a REG_SEQUENCE to force register allocation.
2527 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2528
2529 if (Narrow)
2530 transform(Regs, Regs.begin(),
2531 WidenVector(*CurDAG));
2532
2533 SDValue RegSeq = createQTuple(Regs);
2534
2535 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2536
2537 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2538 N->getOperand(NumVecs + 3), N->getOperand(0)};
2539 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2540
2541 // Transfer memoperands.
2542 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2543 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2544
2545 ReplaceNode(N, St);
2546}
2547
2548void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2549 unsigned Opc) {
2550 SDLoc dl(N);
2551 EVT VT = N->getOperand(2)->getValueType(0);
2552 bool Narrow = VT.getSizeInBits() == 64;
2553
2554 // Form a REG_SEQUENCE to force register allocation.
2555 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2556
2557 if (Narrow)
2558 transform(Regs, Regs.begin(),
2559 WidenVector(*CurDAG));
2560
2561 SDValue RegSeq = createQTuple(Regs);
2562
2563 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2564 MVT::Other};
2565
2566 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2567
2568 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2569 N->getOperand(NumVecs + 2), // Base Register
2570 N->getOperand(NumVecs + 3), // Incremental
2571 N->getOperand(0)};
2572 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2573
2574 // Transfer memoperands.
2575 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2576 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2577
2578 ReplaceNode(N, St);
2579}
2580
2582 unsigned &Opc, SDValue &Opd0,
2583 unsigned &LSB, unsigned &MSB,
2584 unsigned NumberOfIgnoredLowBits,
2585 bool BiggerPattern) {
2586 assert(N->getOpcode() == ISD::AND &&
2587 "N must be a AND operation to call this function");
2588
2589 EVT VT = N->getValueType(0);
2590
2591 // Here we can test the type of VT and return false when the type does not
2592 // match, but since it is done prior to that call in the current context
2593 // we turned that into an assert to avoid redundant code.
2594 assert((VT == MVT::i32 || VT == MVT::i64) &&
2595 "Type checking must have been done before calling this function");
2596
2597 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2598 // changed the AND node to a 32-bit mask operation. We'll have to
2599 // undo that as part of the transform here if we want to catch all
2600 // the opportunities.
2601 // Currently the NumberOfIgnoredLowBits argument helps to recover
2602 // from these situations when matching bigger pattern (bitfield insert).
2603
2604 // For unsigned extracts, check for a shift right and mask
2605 uint64_t AndImm = 0;
2606 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2607 return false;
2608
2609 const SDNode *Op0 = N->getOperand(0).getNode();
2610
2611 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2612 // simplified. Try to undo that
2613 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2614
2615 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2616 if (AndImm & (AndImm + 1))
2617 return false;
2618
2619 bool ClampMSB = false;
2620 uint64_t SrlImm = 0;
2621 // Handle the SRL + ANY_EXTEND case.
2622 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2623 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2624 // Extend the incoming operand of the SRL to 64-bit.
2625 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2626 // Make sure to clamp the MSB so that we preserve the semantics of the
2627 // original operations.
2628 ClampMSB = true;
2629 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2631 SrlImm)) {
2632 // If the shift result was truncated, we can still combine them.
2633 Opd0 = Op0->getOperand(0).getOperand(0);
2634
2635 // Use the type of SRL node.
2636 VT = Opd0->getValueType(0);
2637 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2638 Opd0 = Op0->getOperand(0);
2639 ClampMSB = (VT == MVT::i32);
2640 } else if (BiggerPattern) {
2641 // Let's pretend a 0 shift right has been performed.
2642 // The resulting code will be at least as good as the original one
2643 // plus it may expose more opportunities for bitfield insert pattern.
2644 // FIXME: Currently we limit this to the bigger pattern, because
2645 // some optimizations expect AND and not UBFM.
2646 Opd0 = N->getOperand(0);
2647 } else
2648 return false;
2649
2650 // Bail out on large immediates. This happens when no proper
2651 // combining/constant folding was performed.
2652 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2653 LLVM_DEBUG(
2654 (dbgs() << N
2655 << ": Found large shift immediate, this should not happen\n"));
2656 return false;
2657 }
2658
2659 LSB = SrlImm;
2660 MSB = SrlImm +
2661 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2662 : llvm::countr_one<uint64_t>(AndImm)) -
2663 1;
2664 if (ClampMSB)
2665 // Since we're moving the extend before the right shift operation, we need
2666 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2667 // the zeros which would get shifted in with the original right shift
2668 // operation.
2669 MSB = MSB > 31 ? 31 : MSB;
2670
2671 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2672 return true;
2673}
2674
2676 SDValue &Opd0, unsigned &Immr,
2677 unsigned &Imms) {
2678 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2679
2680 EVT VT = N->getValueType(0);
2681 unsigned BitWidth = VT.getSizeInBits();
2682 assert((VT == MVT::i32 || VT == MVT::i64) &&
2683 "Type checking must have been done before calling this function");
2684
2685 SDValue Op = N->getOperand(0);
2686 if (Op->getOpcode() == ISD::TRUNCATE) {
2687 Op = Op->getOperand(0);
2688 VT = Op->getValueType(0);
2689 BitWidth = VT.getSizeInBits();
2690 }
2691
2692 uint64_t ShiftImm;
2693 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2694 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2695 return false;
2696
2697 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2698 if (ShiftImm + Width > BitWidth)
2699 return false;
2700
2701 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2702 Opd0 = Op.getOperand(0);
2703 Immr = ShiftImm;
2704 Imms = ShiftImm + Width - 1;
2705 return true;
2706}
2707
2709 SDValue &Opd0, unsigned &LSB,
2710 unsigned &MSB) {
2711 // We are looking for the following pattern which basically extracts several
2712 // continuous bits from the source value and places it from the LSB of the
2713 // destination value, all other bits of the destination value or set to zero:
2714 //
2715 // Value2 = AND Value, MaskImm
2716 // SRL Value2, ShiftImm
2717 //
2718 // with MaskImm >> ShiftImm to search for the bit width.
2719 //
2720 // This gets selected into a single UBFM:
2721 //
2722 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2723 //
2724
2725 if (N->getOpcode() != ISD::SRL)
2726 return false;
2727
2728 uint64_t AndMask = 0;
2729 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2730 return false;
2731
2732 Opd0 = N->getOperand(0).getOperand(0);
2733
2734 uint64_t SrlImm = 0;
2735 if (!isIntImmediate(N->getOperand(1), SrlImm))
2736 return false;
2737
2738 // Check whether we really have several bits extract here.
2739 if (!isMask_64(AndMask >> SrlImm))
2740 return false;
2741
2742 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2743 LSB = SrlImm;
2744 MSB = llvm::Log2_64(AndMask);
2745 return true;
2746}
2747
2748static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2749 unsigned &Immr, unsigned &Imms,
2750 bool BiggerPattern) {
2751 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2752 "N must be a SHR/SRA operation to call this function");
2753
2754 EVT VT = N->getValueType(0);
2755
2756 // Here we can test the type of VT and return false when the type does not
2757 // match, but since it is done prior to that call in the current context
2758 // we turned that into an assert to avoid redundant code.
2759 assert((VT == MVT::i32 || VT == MVT::i64) &&
2760 "Type checking must have been done before calling this function");
2761
2762 // Check for AND + SRL doing several bits extract.
2763 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2764 return true;
2765
2766 // We're looking for a shift of a shift.
2767 uint64_t ShlImm = 0;
2768 uint64_t TruncBits = 0;
2769 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2770 Opd0 = N->getOperand(0).getOperand(0);
2771 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2772 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2773 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2774 // be considered as setting high 32 bits as zero. Our strategy here is to
2775 // always generate 64bit UBFM. This consistency will help the CSE pass
2776 // later find more redundancy.
2777 Opd0 = N->getOperand(0).getOperand(0);
2778 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2779 VT = Opd0.getValueType();
2780 assert(VT == MVT::i64 && "the promoted type should be i64");
2781 } else if (BiggerPattern) {
2782 // Let's pretend a 0 shift left has been performed.
2783 // FIXME: Currently we limit this to the bigger pattern case,
2784 // because some optimizations expect AND and not UBFM
2785 Opd0 = N->getOperand(0);
2786 } else
2787 return false;
2788
2789 // Missing combines/constant folding may have left us with strange
2790 // constants.
2791 if (ShlImm >= VT.getSizeInBits()) {
2792 LLVM_DEBUG(
2793 (dbgs() << N
2794 << ": Found large shift immediate, this should not happen\n"));
2795 return false;
2796 }
2797
2798 uint64_t SrlImm = 0;
2799 if (!isIntImmediate(N->getOperand(1), SrlImm))
2800 return false;
2801
2802 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2803 "bad amount in shift node!");
2804 int immr = SrlImm - ShlImm;
2805 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2806 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2807 // SRA requires a signed extraction
2808 if (VT == MVT::i32)
2809 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2810 else
2811 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2812 return true;
2813}
2814
2815bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2816 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2817
2818 EVT VT = N->getValueType(0);
2819 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2820 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2821 return false;
2822
2823 uint64_t ShiftImm;
2824 SDValue Op = N->getOperand(0);
2825 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2826 return false;
2827
2828 SDLoc dl(N);
2829 // Extend the incoming operand of the shift to 64-bits.
2830 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2831 unsigned Immr = ShiftImm;
2832 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2833 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2834 CurDAG->getTargetConstant(Imms, dl, VT)};
2835 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2836 return true;
2837}
2838
2839static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2840 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2841 unsigned NumberOfIgnoredLowBits = 0,
2842 bool BiggerPattern = false) {
2843 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2844 return false;
2845
2846 switch (N->getOpcode()) {
2847 default:
2848 if (!N->isMachineOpcode())
2849 return false;
2850 break;
2851 case ISD::AND:
2852 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2853 NumberOfIgnoredLowBits, BiggerPattern);
2854 case ISD::SRL:
2855 case ISD::SRA:
2856 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2857
2859 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2860 }
2861
2862 unsigned NOpc = N->getMachineOpcode();
2863 switch (NOpc) {
2864 default:
2865 return false;
2866 case AArch64::SBFMWri:
2867 case AArch64::UBFMWri:
2868 case AArch64::SBFMXri:
2869 case AArch64::UBFMXri:
2870 Opc = NOpc;
2871 Opd0 = N->getOperand(0);
2872 Immr = N->getConstantOperandVal(1);
2873 Imms = N->getConstantOperandVal(2);
2874 return true;
2875 }
2876 // Unreachable
2877 return false;
2878}
2879
2880bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2881 unsigned Opc, Immr, Imms;
2882 SDValue Opd0;
2883 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2884 return false;
2885
2886 EVT VT = N->getValueType(0);
2887 SDLoc dl(N);
2888
2889 // If the bit extract operation is 64bit but the original type is 32bit, we
2890 // need to add one EXTRACT_SUBREG.
2891 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2892 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2893 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2894
2895 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2896 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2897 MVT::i32, SDValue(BFM, 0));
2898 ReplaceNode(N, Inner.getNode());
2899 return true;
2900 }
2901
2902 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2903 CurDAG->getTargetConstant(Imms, dl, VT)};
2904 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2905 return true;
2906}
2907
2908/// Does DstMask form a complementary pair with the mask provided by
2909/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2910/// this asks whether DstMask zeroes precisely those bits that will be set by
2911/// the other half.
2912static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2913 unsigned NumberOfIgnoredHighBits, EVT VT) {
2914 assert((VT == MVT::i32 || VT == MVT::i64) &&
2915 "i32 or i64 mask type expected!");
2916 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2917
2918 // Enable implicitTrunc as we're intentionally ignoring high bits.
2919 APInt SignificantDstMask =
2920 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
2921 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2922
2923 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2924 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2925}
2926
2927// Look for bits that will be useful for later uses.
2928// A bit is consider useless as soon as it is dropped and never used
2929// before it as been dropped.
2930// E.g., looking for useful bit of x
2931// 1. y = x & 0x7
2932// 2. z = y >> 2
2933// After #1, x useful bits are 0x7, then the useful bits of x, live through
2934// y.
2935// After #2, the useful bits of x are 0x4.
2936// However, if x is used on an unpredictable instruction, then all its bits
2937// are useful.
2938// E.g.
2939// 1. y = x & 0x7
2940// 2. z = y >> 2
2941// 3. str x, [@x]
2942static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2943
2945 unsigned Depth) {
2946 uint64_t Imm =
2947 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2948 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2949 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2950 getUsefulBits(Op, UsefulBits, Depth + 1);
2951}
2952
2954 uint64_t Imm, uint64_t MSB,
2955 unsigned Depth) {
2956 // inherit the bitwidth value
2957 APInt OpUsefulBits(UsefulBits);
2958 OpUsefulBits = 1;
2959
2960 if (MSB >= Imm) {
2961 OpUsefulBits <<= MSB - Imm + 1;
2962 --OpUsefulBits;
2963 // The interesting part will be in the lower part of the result
2964 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2965 // The interesting part was starting at Imm in the argument
2966 OpUsefulBits <<= Imm;
2967 } else {
2968 OpUsefulBits <<= MSB + 1;
2969 --OpUsefulBits;
2970 // The interesting part will be shifted in the result
2971 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2972 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2973 // The interesting part was at zero in the argument
2974 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2975 }
2976
2977 UsefulBits &= OpUsefulBits;
2978}
2979
2980static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2981 unsigned Depth) {
2982 uint64_t Imm =
2983 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2984 uint64_t MSB =
2985 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2986
2987 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2988}
2989
2991 unsigned Depth) {
2992 uint64_t ShiftTypeAndValue =
2993 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2994 APInt Mask(UsefulBits);
2995 Mask.clearAllBits();
2996 Mask.flipAllBits();
2997
2998 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2999 // Shift Left
3000 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3001 Mask <<= ShiftAmt;
3002 getUsefulBits(Op, Mask, Depth + 1);
3003 Mask.lshrInPlace(ShiftAmt);
3004 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3005 // Shift Right
3006 // We do not handle AArch64_AM::ASR, because the sign will change the
3007 // number of useful bits
3008 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3009 Mask.lshrInPlace(ShiftAmt);
3010 getUsefulBits(Op, Mask, Depth + 1);
3011 Mask <<= ShiftAmt;
3012 } else
3013 return;
3014
3015 UsefulBits &= Mask;
3016}
3017
3018static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3019 unsigned Depth) {
3020 uint64_t Imm =
3021 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3022 uint64_t MSB =
3023 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3024
3025 APInt OpUsefulBits(UsefulBits);
3026 OpUsefulBits = 1;
3027
3028 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3029 ResultUsefulBits.flipAllBits();
3030 APInt Mask(UsefulBits.getBitWidth(), 0);
3031
3032 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3033
3034 if (MSB >= Imm) {
3035 // The instruction is a BFXIL.
3036 uint64_t Width = MSB - Imm + 1;
3037 uint64_t LSB = Imm;
3038
3039 OpUsefulBits <<= Width;
3040 --OpUsefulBits;
3041
3042 if (Op.getOperand(1) == Orig) {
3043 // Copy the low bits from the result to bits starting from LSB.
3044 Mask = ResultUsefulBits & OpUsefulBits;
3045 Mask <<= LSB;
3046 }
3047
3048 if (Op.getOperand(0) == Orig)
3049 // Bits starting from LSB in the input contribute to the result.
3050 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3051 } else {
3052 // The instruction is a BFI.
3053 uint64_t Width = MSB + 1;
3054 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3055
3056 OpUsefulBits <<= Width;
3057 --OpUsefulBits;
3058 OpUsefulBits <<= LSB;
3059
3060 if (Op.getOperand(1) == Orig) {
3061 // Copy the bits from the result to the zero bits.
3062 Mask = ResultUsefulBits & OpUsefulBits;
3063 Mask.lshrInPlace(LSB);
3064 }
3065
3066 if (Op.getOperand(0) == Orig)
3067 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3068 }
3069
3070 UsefulBits &= Mask;
3071}
3072
3073static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3074 SDValue Orig, unsigned Depth) {
3075
3076 // Users of this node should have already been instruction selected
3077 // FIXME: Can we turn that into an assert?
3078 if (!UserNode->isMachineOpcode())
3079 return;
3080
3081 switch (UserNode->getMachineOpcode()) {
3082 default:
3083 return;
3084 case AArch64::ANDSWri:
3085 case AArch64::ANDSXri:
3086 case AArch64::ANDWri:
3087 case AArch64::ANDXri:
3088 // We increment Depth only when we call the getUsefulBits
3089 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3090 Depth);
3091 case AArch64::UBFMWri:
3092 case AArch64::UBFMXri:
3093 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3094
3095 case AArch64::ORRWrs:
3096 case AArch64::ORRXrs:
3097 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3098 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3099 Depth);
3100 return;
3101 case AArch64::BFMWri:
3102 case AArch64::BFMXri:
3103 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3104
3105 case AArch64::STRBBui:
3106 case AArch64::STURBBi:
3107 if (UserNode->getOperand(0) != Orig)
3108 return;
3109 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3110 return;
3111
3112 case AArch64::STRHHui:
3113 case AArch64::STURHHi:
3114 if (UserNode->getOperand(0) != Orig)
3115 return;
3116 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3117 return;
3118 }
3119}
3120
3121static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3123 return;
3124 // Initialize UsefulBits
3125 if (!Depth) {
3126 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3127 // At the beginning, assume every produced bits is useful
3128 UsefulBits = APInt(Bitwidth, 0);
3129 UsefulBits.flipAllBits();
3130 }
3131 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3132
3133 for (SDNode *Node : Op.getNode()->users()) {
3134 // A use cannot produce useful bits
3135 APInt UsefulBitsForUse = APInt(UsefulBits);
3136 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3137 UsersUsefulBits |= UsefulBitsForUse;
3138 }
3139 // UsefulBits contains the produced bits that are meaningful for the
3140 // current definition, thus a user cannot make a bit meaningful at
3141 // this point
3142 UsefulBits &= UsersUsefulBits;
3143}
3144
3145/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3146/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3147/// 0, return Op unchanged.
3148static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3149 if (ShlAmount == 0)
3150 return Op;
3151
3152 EVT VT = Op.getValueType();
3153 SDLoc dl(Op);
3154 unsigned BitWidth = VT.getSizeInBits();
3155 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3156
3157 SDNode *ShiftNode;
3158 if (ShlAmount > 0) {
3159 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3160 ShiftNode = CurDAG->getMachineNode(
3161 UBFMOpc, dl, VT, Op,
3162 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3163 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3164 } else {
3165 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3166 assert(ShlAmount < 0 && "expected right shift");
3167 int ShrAmount = -ShlAmount;
3168 ShiftNode = CurDAG->getMachineNode(
3169 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3170 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3171 }
3172
3173 return SDValue(ShiftNode, 0);
3174}
3175
3176// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3177static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3178 bool BiggerPattern,
3179 const uint64_t NonZeroBits,
3180 SDValue &Src, int &DstLSB,
3181 int &Width);
3182
3183// For bit-field-positioning pattern "shl VAL, N)".
3184static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3185 bool BiggerPattern,
3186 const uint64_t NonZeroBits,
3187 SDValue &Src, int &DstLSB,
3188 int &Width);
3189
3190/// Does this tree qualify as an attempt to move a bitfield into position,
3191/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3193 bool BiggerPattern, SDValue &Src,
3194 int &DstLSB, int &Width) {
3195 EVT VT = Op.getValueType();
3196 unsigned BitWidth = VT.getSizeInBits();
3197 (void)BitWidth;
3198 assert(BitWidth == 32 || BitWidth == 64);
3199
3200 KnownBits Known = CurDAG->computeKnownBits(Op);
3201
3202 // Non-zero in the sense that they're not provably zero, which is the key
3203 // point if we want to use this value
3204 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3205 if (!isShiftedMask_64(NonZeroBits))
3206 return false;
3207
3208 switch (Op.getOpcode()) {
3209 default:
3210 break;
3211 case ISD::AND:
3212 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3213 NonZeroBits, Src, DstLSB, Width);
3214 case ISD::SHL:
3215 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3216 NonZeroBits, Src, DstLSB, Width);
3217 }
3218
3219 return false;
3220}
3221
3223 bool BiggerPattern,
3224 const uint64_t NonZeroBits,
3225 SDValue &Src, int &DstLSB,
3226 int &Width) {
3227 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3228
3229 EVT VT = Op.getValueType();
3230 assert((VT == MVT::i32 || VT == MVT::i64) &&
3231 "Caller guarantees VT is one of i32 or i64");
3232 (void)VT;
3233
3234 uint64_t AndImm;
3235 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3236 return false;
3237
3238 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3239 // 1) (AndImm & (1 << POS) == 0)
3240 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3241 //
3242 // 1) and 2) don't agree so something must be wrong (e.g., in
3243 // 'SelectionDAG::computeKnownBits')
3244 assert((~AndImm & NonZeroBits) == 0 &&
3245 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3246
3247 SDValue AndOp0 = Op.getOperand(0);
3248
3249 uint64_t ShlImm;
3250 SDValue ShlOp0;
3251 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3252 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3253 ShlOp0 = AndOp0.getOperand(0);
3254 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3256 ShlImm)) {
3257 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3258
3259 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3260 SDValue ShlVal = AndOp0.getOperand(0);
3261
3262 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3263 // expect VT to be MVT::i32.
3264 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3265
3266 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3267 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3268 } else
3269 return false;
3270
3271 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3272 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3273 // AndOp0+AND.
3274 if (!BiggerPattern && !AndOp0.hasOneUse())
3275 return false;
3276
3277 DstLSB = llvm::countr_zero(NonZeroBits);
3278 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3279
3280 // Bail out on large Width. This happens when no proper combining / constant
3281 // folding was performed.
3282 if (Width >= (int)VT.getSizeInBits()) {
3283 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3284 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3285 // "val".
3286 // If VT is i32, what Width >= 32 means:
3287 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3288 // demands at least 'Width' bits (after dag-combiner). This together with
3289 // `any_extend` Op (undefined higher bits) indicates missed combination
3290 // when lowering the 'and' IR instruction to an machine IR instruction.
3291 LLVM_DEBUG(
3292 dbgs()
3293 << "Found large Width in bit-field-positioning -- this indicates no "
3294 "proper combining / constant folding was performed\n");
3295 return false;
3296 }
3297
3298 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3299 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3300 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3301 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3302 // which case it is not profitable to insert an extra shift.
3303 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3304 return false;
3305
3306 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3307 return true;
3308}
3309
3310// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3311// UBFIZ.
3313 SDValue &Src, int &DstLSB,
3314 int &Width) {
3315 // Caller should have verified that N is a left shift with constant shift
3316 // amount; asserts that.
3317 assert(Op.getOpcode() == ISD::SHL &&
3318 "Op.getNode() should be a SHL node to call this function");
3319 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3320 "Op.getNode() should shift ShlImm to call this function");
3321
3322 uint64_t AndImm = 0;
3323 SDValue Op0 = Op.getOperand(0);
3324 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3325 return false;
3326
3327 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3328 if (isMask_64(ShiftedAndImm)) {
3329 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3330 // should end with Mask, and could be prefixed with random bits if those
3331 // bits are shifted out.
3332 //
3333 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3334 // the AND result corresponding to those bits are shifted out, so it's fine
3335 // to not extract them.
3336 Width = llvm::countr_one(ShiftedAndImm);
3337 DstLSB = ShlImm;
3338 Src = Op0.getOperand(0);
3339 return true;
3340 }
3341 return false;
3342}
3343
3345 bool BiggerPattern,
3346 const uint64_t NonZeroBits,
3347 SDValue &Src, int &DstLSB,
3348 int &Width) {
3349 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3350
3351 EVT VT = Op.getValueType();
3352 assert((VT == MVT::i32 || VT == MVT::i64) &&
3353 "Caller guarantees that type is i32 or i64");
3354 (void)VT;
3355
3356 uint64_t ShlImm;
3357 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3358 return false;
3359
3360 if (!BiggerPattern && !Op.hasOneUse())
3361 return false;
3362
3363 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3364 return true;
3365
3366 DstLSB = llvm::countr_zero(NonZeroBits);
3367 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3368
3369 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3370 return false;
3371
3372 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3373 return true;
3374}
3375
3376static bool isShiftedMask(uint64_t Mask, EVT VT) {
3377 assert(VT == MVT::i32 || VT == MVT::i64);
3378 if (VT == MVT::i32)
3379 return isShiftedMask_32(Mask);
3380 return isShiftedMask_64(Mask);
3381}
3382
3383// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3384// inserted only sets known zero bits.
3386 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3387
3388 EVT VT = N->getValueType(0);
3389 if (VT != MVT::i32 && VT != MVT::i64)
3390 return false;
3391
3392 unsigned BitWidth = VT.getSizeInBits();
3393
3394 uint64_t OrImm;
3395 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3396 return false;
3397
3398 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3399 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3400 // performance neutral.
3402 return false;
3403
3404 uint64_t MaskImm;
3405 SDValue And = N->getOperand(0);
3406 // Must be a single use AND with an immediate operand.
3407 if (!And.hasOneUse() ||
3408 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3409 return false;
3410
3411 // Compute the Known Zero for the AND as this allows us to catch more general
3412 // cases than just looking for AND with imm.
3413 KnownBits Known = CurDAG->computeKnownBits(And);
3414
3415 // Non-zero in the sense that they're not provably zero, which is the key
3416 // point if we want to use this value.
3417 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3418
3419 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3420 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3421 return false;
3422
3423 // The bits being inserted must only set those bits that are known to be zero.
3424 if ((OrImm & NotKnownZero) != 0) {
3425 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3426 // currently handle this case.
3427 return false;
3428 }
3429
3430 // BFI/BFXIL dst, src, #lsb, #width.
3431 int LSB = llvm::countr_one(NotKnownZero);
3432 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3433
3434 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3435 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3436 unsigned ImmS = Width - 1;
3437
3438 // If we're creating a BFI instruction avoid cases where we need more
3439 // instructions to materialize the BFI constant as compared to the original
3440 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3441 // should be no worse in this case.
3442 bool IsBFI = LSB != 0;
3443 uint64_t BFIImm = OrImm >> LSB;
3444 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3445 // We have a BFI instruction and we know the constant can't be materialized
3446 // with a ORR-immediate with the zero register.
3447 unsigned OrChunks = 0, BFIChunks = 0;
3448 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3449 if (((OrImm >> Shift) & 0xFFFF) != 0)
3450 ++OrChunks;
3451 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3452 ++BFIChunks;
3453 }
3454 if (BFIChunks > OrChunks)
3455 return false;
3456 }
3457
3458 // Materialize the constant to be inserted.
3459 SDLoc DL(N);
3460 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3461 SDNode *MOVI = CurDAG->getMachineNode(
3462 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3463
3464 // Create the BFI/BFXIL instruction.
3465 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3466 CurDAG->getTargetConstant(ImmR, DL, VT),
3467 CurDAG->getTargetConstant(ImmS, DL, VT)};
3468 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3469 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3470 return true;
3471}
3472
3474 SDValue &ShiftedOperand,
3475 uint64_t &EncodedShiftImm) {
3476 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3477 if (!Dst.hasOneUse())
3478 return false;
3479
3480 EVT VT = Dst.getValueType();
3481 assert((VT == MVT::i32 || VT == MVT::i64) &&
3482 "Caller should guarantee that VT is one of i32 or i64");
3483 const unsigned SizeInBits = VT.getSizeInBits();
3484
3485 SDLoc DL(Dst.getNode());
3486 uint64_t AndImm, ShlImm;
3487 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3488 isShiftedMask_64(AndImm)) {
3489 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3490 SDValue DstOp0 = Dst.getOperand(0);
3491 if (!DstOp0.hasOneUse())
3492 return false;
3493
3494 // An example to illustrate the transformation
3495 // From:
3496 // lsr x8, x1, #1
3497 // and x8, x8, #0x3f80
3498 // bfxil x8, x1, #0, #7
3499 // To:
3500 // and x8, x23, #0x7f
3501 // ubfx x9, x23, #8, #7
3502 // orr x23, x8, x9, lsl #7
3503 //
3504 // The number of instructions remains the same, but ORR is faster than BFXIL
3505 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3506 // the dependency chain is improved after the transformation.
3507 uint64_t SrlImm;
3508 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3509 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3510 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3511 unsigned MaskWidth =
3512 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3513 unsigned UBFMOpc =
3514 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3515 SDNode *UBFMNode = CurDAG->getMachineNode(
3516 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3517 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3518 VT),
3519 CurDAG->getTargetConstant(
3520 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3521 ShiftedOperand = SDValue(UBFMNode, 0);
3522 EncodedShiftImm = AArch64_AM::getShifterImm(
3523 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3524 return true;
3525 }
3526 }
3527 return false;
3528 }
3529
3530 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3531 ShiftedOperand = Dst.getOperand(0);
3532 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3533 return true;
3534 }
3535
3536 uint64_t SrlImm;
3537 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3538 ShiftedOperand = Dst.getOperand(0);
3539 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3540 return true;
3541 }
3542 return false;
3543}
3544
3545// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3546// the operands and select it to AArch64::ORR with shifted registers if
3547// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3548static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3549 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3550 const bool BiggerPattern) {
3551 EVT VT = N->getValueType(0);
3552 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3553 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3554 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3555 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3556 assert((VT == MVT::i32 || VT == MVT::i64) &&
3557 "Expect result type to be i32 or i64 since N is combinable to BFM");
3558 SDLoc DL(N);
3559
3560 // Bail out if BFM simplifies away one node in BFM Dst.
3561 if (OrOpd1 != Dst)
3562 return false;
3563
3564 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3565 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3566 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3567 if (BiggerPattern) {
3568 uint64_t SrcAndImm;
3569 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3570 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3571 // OrOpd0 = AND Src, #Mask
3572 // So BFM simplifies away one AND node from Src and doesn't simplify away
3573 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3574 // one node (from Rd), ORR is better since it has higher throughput and
3575 // smaller latency than BFM on many AArch64 processors (and for the rest
3576 // ORR is at least as good as BFM).
3577 SDValue ShiftedOperand;
3578 uint64_t EncodedShiftImm;
3579 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3580 EncodedShiftImm)) {
3581 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3582 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3583 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3584 return true;
3585 }
3586 }
3587 return false;
3588 }
3589
3590 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3591
3592 uint64_t ShlImm;
3593 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3594 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3595 SDValue Ops[] = {
3596 Dst, Src,
3597 CurDAG->getTargetConstant(
3599 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3600 return true;
3601 }
3602
3603 // Select the following pattern to left-shifted operand rather than BFI.
3604 // %val1 = op ..
3605 // %val2 = shl %val1, #imm
3606 // %res = or %val1, %val2
3607 //
3608 // If N is selected to be BFI, we know that
3609 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3610 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3611 //
3612 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3613 if (OrOpd0.getOperand(0) == OrOpd1) {
3614 SDValue Ops[] = {
3615 OrOpd1, OrOpd1,
3616 CurDAG->getTargetConstant(
3618 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3619 return true;
3620 }
3621 }
3622
3623 uint64_t SrlImm;
3624 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3625 // Select the following pattern to right-shifted operand rather than BFXIL.
3626 // %val1 = op ..
3627 // %val2 = lshr %val1, #imm
3628 // %res = or %val1, %val2
3629 //
3630 // If N is selected to be BFXIL, we know that
3631 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3632 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3633 //
3634 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3635 if (OrOpd0.getOperand(0) == OrOpd1) {
3636 SDValue Ops[] = {
3637 OrOpd1, OrOpd1,
3638 CurDAG->getTargetConstant(
3640 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3641 return true;
3642 }
3643 }
3644
3645 return false;
3646}
3647
3648static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3649 SelectionDAG *CurDAG) {
3650 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3651
3652 EVT VT = N->getValueType(0);
3653 if (VT != MVT::i32 && VT != MVT::i64)
3654 return false;
3655
3656 unsigned BitWidth = VT.getSizeInBits();
3657
3658 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3659 // have the expected shape. Try to undo that.
3660
3661 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3662 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3663
3664 // Given a OR operation, check if we have the following pattern
3665 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3666 // isBitfieldExtractOp)
3667 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3668 // countTrailingZeros(mask2) == imm2 - imm + 1
3669 // f = d | c
3670 // if yes, replace the OR instruction with:
3671 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3672
3673 // OR is commutative, check all combinations of operand order and values of
3674 // BiggerPattern, i.e.
3675 // Opd0, Opd1, BiggerPattern=false
3676 // Opd1, Opd0, BiggerPattern=false
3677 // Opd0, Opd1, BiggerPattern=true
3678 // Opd1, Opd0, BiggerPattern=true
3679 // Several of these combinations may match, so check with BiggerPattern=false
3680 // first since that will produce better results by matching more instructions
3681 // and/or inserting fewer extra instructions.
3682 for (int I = 0; I < 4; ++I) {
3683
3684 SDValue Dst, Src;
3685 unsigned ImmR, ImmS;
3686 bool BiggerPattern = I / 2;
3687 SDValue OrOpd0Val = N->getOperand(I % 2);
3688 SDNode *OrOpd0 = OrOpd0Val.getNode();
3689 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3690 SDNode *OrOpd1 = OrOpd1Val.getNode();
3691
3692 unsigned BFXOpc;
3693 int DstLSB, Width;
3694 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3695 NumberOfIgnoredLowBits, BiggerPattern)) {
3696 // Check that the returned opcode is compatible with the pattern,
3697 // i.e., same type and zero extended (U and not S)
3698 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3699 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3700 continue;
3701
3702 // Compute the width of the bitfield insertion
3703 DstLSB = 0;
3704 Width = ImmS - ImmR + 1;
3705 // FIXME: This constraint is to catch bitfield insertion we may
3706 // want to widen the pattern if we want to grab general bitfield
3707 // move case
3708 if (Width <= 0)
3709 continue;
3710
3711 // If the mask on the insertee is correct, we have a BFXIL operation. We
3712 // can share the ImmR and ImmS values from the already-computed UBFM.
3713 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3714 BiggerPattern,
3715 Src, DstLSB, Width)) {
3716 ImmR = (BitWidth - DstLSB) % BitWidth;
3717 ImmS = Width - 1;
3718 } else
3719 continue;
3720
3721 // Check the second part of the pattern
3722 EVT VT = OrOpd1Val.getValueType();
3723 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3724
3725 // Compute the Known Zero for the candidate of the first operand.
3726 // This allows to catch more general case than just looking for
3727 // AND with imm. Indeed, simplify-demanded-bits may have removed
3728 // the AND instruction because it proves it was useless.
3729 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3730
3731 // Check if there is enough room for the second operand to appear
3732 // in the first one
3733 APInt BitsToBeInserted =
3734 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3735
3736 if ((BitsToBeInserted & ~Known.Zero) != 0)
3737 continue;
3738
3739 // Set the first operand
3740 uint64_t Imm;
3741 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3742 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3743 // In that case, we can eliminate the AND
3744 Dst = OrOpd1->getOperand(0);
3745 else
3746 // Maybe the AND has been removed by simplify-demanded-bits
3747 // or is useful because it discards more bits
3748 Dst = OrOpd1Val;
3749
3750 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3751 // with shifted operand is more efficient.
3752 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3753 BiggerPattern))
3754 return true;
3755
3756 // both parts match
3757 SDLoc DL(N);
3758 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3759 CurDAG->getTargetConstant(ImmS, DL, VT)};
3760 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3761 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3762 return true;
3763 }
3764
3765 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3766 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3767 // mask (e.g., 0x000ffff0).
3768 uint64_t Mask0Imm, Mask1Imm;
3769 SDValue And0 = N->getOperand(0);
3770 SDValue And1 = N->getOperand(1);
3771 if (And0.hasOneUse() && And1.hasOneUse() &&
3772 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3773 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3774 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3775 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3776
3777 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3778 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3779 // bits to be inserted.
3780 if (isShiftedMask(Mask0Imm, VT)) {
3781 std::swap(And0, And1);
3782 std::swap(Mask0Imm, Mask1Imm);
3783 }
3784
3785 SDValue Src = And1->getOperand(0);
3786 SDValue Dst = And0->getOperand(0);
3787 unsigned LSB = llvm::countr_zero(Mask1Imm);
3788 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3789
3790 // The BFXIL inserts the low-order bits from a source register, so right
3791 // shift the needed bits into place.
3792 SDLoc DL(N);
3793 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3794 uint64_t LsrImm = LSB;
3795 if (Src->hasOneUse() &&
3796 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3797 (LsrImm + LSB) < BitWidth) {
3798 Src = Src->getOperand(0);
3799 LsrImm += LSB;
3800 }
3801
3802 SDNode *LSR = CurDAG->getMachineNode(
3803 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3804 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3805
3806 // BFXIL is an alias of BFM, so translate to BFM operands.
3807 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3808 unsigned ImmS = Width - 1;
3809
3810 // Create the BFXIL instruction.
3811 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3812 CurDAG->getTargetConstant(ImmR, DL, VT),
3813 CurDAG->getTargetConstant(ImmS, DL, VT)};
3814 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3815 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3816 return true;
3817 }
3818
3819 return false;
3820}
3821
3822bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3823 if (N->getOpcode() != ISD::OR)
3824 return false;
3825
3826 APInt NUsefulBits;
3827 getUsefulBits(SDValue(N, 0), NUsefulBits);
3828
3829 // If all bits are not useful, just return UNDEF.
3830 if (!NUsefulBits) {
3831 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3832 return true;
3833 }
3834
3835 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3836 return true;
3837
3838 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3839}
3840
3841/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3842/// equivalent of a left shift by a constant amount followed by an and masking
3843/// out a contiguous set of bits.
3844bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3845 if (N->getOpcode() != ISD::AND)
3846 return false;
3847
3848 EVT VT = N->getValueType(0);
3849 if (VT != MVT::i32 && VT != MVT::i64)
3850 return false;
3851
3852 SDValue Op0;
3853 int DstLSB, Width;
3854 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3855 Op0, DstLSB, Width))
3856 return false;
3857
3858 // ImmR is the rotate right amount.
3859 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3860 // ImmS is the most significant bit of the source to be moved.
3861 unsigned ImmS = Width - 1;
3862
3863 SDLoc DL(N);
3864 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3865 CurDAG->getTargetConstant(ImmS, DL, VT)};
3866 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3867 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3868 return true;
3869}
3870
3871/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3872/// variable shift/rotate instructions.
3873bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3874 EVT VT = N->getValueType(0);
3875
3876 unsigned Opc;
3877 switch (N->getOpcode()) {
3878 case ISD::ROTR:
3879 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3880 break;
3881 case ISD::SHL:
3882 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3883 break;
3884 case ISD::SRL:
3885 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3886 break;
3887 case ISD::SRA:
3888 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3889 break;
3890 default:
3891 return false;
3892 }
3893
3894 uint64_t Size;
3895 uint64_t Bits;
3896 if (VT == MVT::i32) {
3897 Bits = 5;
3898 Size = 32;
3899 } else if (VT == MVT::i64) {
3900 Bits = 6;
3901 Size = 64;
3902 } else
3903 return false;
3904
3905 SDValue ShiftAmt = N->getOperand(1);
3906 SDLoc DL(N);
3907 SDValue NewShiftAmt;
3908
3909 // Skip over an extend of the shift amount.
3910 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3911 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3912 ShiftAmt = ShiftAmt->getOperand(0);
3913
3914 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3915 SDValue Add0 = ShiftAmt->getOperand(0);
3916 SDValue Add1 = ShiftAmt->getOperand(1);
3917 uint64_t Add0Imm;
3918 uint64_t Add1Imm;
3919 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3920 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3921 // to avoid the ADD/SUB.
3922 NewShiftAmt = Add0;
3923 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3924 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3925 (Add0Imm % Size == 0)) {
3926 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3927 // to generate a NEG instead of a SUB from a constant.
3928 unsigned NegOpc;
3929 unsigned ZeroReg;
3930 EVT SubVT = ShiftAmt->getValueType(0);
3931 if (SubVT == MVT::i32) {
3932 NegOpc = AArch64::SUBWrr;
3933 ZeroReg = AArch64::WZR;
3934 } else {
3935 assert(SubVT == MVT::i64);
3936 NegOpc = AArch64::SUBXrr;
3937 ZeroReg = AArch64::XZR;
3938 }
3939 SDValue Zero =
3940 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3941 MachineSDNode *Neg =
3942 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3943 NewShiftAmt = SDValue(Neg, 0);
3944 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3945 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3946 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3947 // to generate a NOT instead of a SUB from a constant.
3948 unsigned NotOpc;
3949 unsigned ZeroReg;
3950 EVT SubVT = ShiftAmt->getValueType(0);
3951 if (SubVT == MVT::i32) {
3952 NotOpc = AArch64::ORNWrr;
3953 ZeroReg = AArch64::WZR;
3954 } else {
3955 assert(SubVT == MVT::i64);
3956 NotOpc = AArch64::ORNXrr;
3957 ZeroReg = AArch64::XZR;
3958 }
3959 SDValue Zero =
3960 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3961 MachineSDNode *Not =
3962 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3963 NewShiftAmt = SDValue(Not, 0);
3964 } else
3965 return false;
3966 } else {
3967 // If the shift amount is masked with an AND, check that the mask covers the
3968 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3969 // the AND.
3970 uint64_t MaskImm;
3971 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3972 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3973 return false;
3974
3975 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3976 return false;
3977
3978 NewShiftAmt = ShiftAmt->getOperand(0);
3979 }
3980
3981 // Narrow/widen the shift amount to match the size of the shift operation.
3982 if (VT == MVT::i32)
3983 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3984 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3985 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3986 MachineSDNode *Ext = CurDAG->getMachineNode(
3987 AArch64::SUBREG_TO_REG, DL, VT,
3988 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3989 NewShiftAmt = SDValue(Ext, 0);
3990 }
3991
3992 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3993 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3994 return true;
3995}
3996
3998 SDValue &FixedPos,
3999 unsigned RegWidth,
4000 bool isReciprocal) {
4001 APFloat FVal(0.0);
4003 FVal = CN->getValueAPF();
4004 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4005 // Some otherwise illegal constants are allowed in this case.
4006 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4007 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4008 return false;
4009
4010 ConstantPoolSDNode *CN =
4011 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4012 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4013 } else
4014 return false;
4015
4016 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
4017 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
4018 // x-register.
4019 //
4020 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
4021 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
4022 // integers.
4023 bool IsExact;
4024
4025 if (isReciprocal)
4026 if (!FVal.getExactInverse(&FVal))
4027 return false;
4028
4029 // fbits is between 1 and 64 in the worst-case, which means the fmul
4030 // could have 2^64 as an actual operand. Need 65 bits of precision.
4031 APSInt IntVal(65, true);
4032 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
4033
4034 // N.b. isPowerOf2 also checks for > 0.
4035 if (!IsExact || !IntVal.isPowerOf2())
4036 return false;
4037 unsigned FBits = IntVal.logBase2();
4038
4039 // Checks above should have guaranteed that we haven't lost information in
4040 // finding FBits, but it must still be in range.
4041 if (FBits == 0 || FBits > RegWidth) return false;
4042
4043 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4044 return true;
4045}
4046
4047bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4048 unsigned RegWidth) {
4049 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4050 false);
4051}
4052
4053bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4054 SDValue &FixedPos,
4055 unsigned RegWidth) {
4056 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4057 true);
4058}
4059
4060// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4061// of the string and obtains the integer values from them and combines these
4062// into a single value to be used in the MRS/MSR instruction.
4065 RegString.split(Fields, ':');
4066
4067 if (Fields.size() == 1)
4068 return -1;
4069
4070 assert(Fields.size() == 5
4071 && "Invalid number of fields in read register string");
4072
4074 bool AllIntFields = true;
4075
4076 for (StringRef Field : Fields) {
4077 unsigned IntField;
4078 AllIntFields &= !Field.getAsInteger(10, IntField);
4079 Ops.push_back(IntField);
4080 }
4081
4082 assert(AllIntFields &&
4083 "Unexpected non-integer value in special register string.");
4084 (void)AllIntFields;
4085
4086 // Need to combine the integer fields of the string into a single value
4087 // based on the bit encoding of MRS/MSR instruction.
4088 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
4089 (Ops[3] << 3) | (Ops[4]);
4090}
4091
4092// Lower the read_register intrinsic to an MRS instruction node if the special
4093// register string argument is either of the form detailed in the ALCE (the
4094// form described in getIntOperandsFromRegisterString) or is a named register
4095// known by the MRS SysReg mapper.
4096bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4097 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4098 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4099 SDLoc DL(N);
4100
4101 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4102
4103 unsigned Opcode64Bit = AArch64::MRS;
4104 int Imm = getIntOperandFromRegisterString(RegString->getString());
4105 if (Imm == -1) {
4106 // No match, Use the sysreg mapper to map the remaining possible strings to
4107 // the value for the register to be used for the instruction operand.
4108 const auto *TheReg =
4109 AArch64SysReg::lookupSysRegByName(RegString->getString());
4110 if (TheReg && TheReg->Readable &&
4111 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4112 Imm = TheReg->Encoding;
4113 else
4114 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4115
4116 if (Imm == -1) {
4117 // Still no match, see if this is "pc" or give up.
4118 if (!ReadIs128Bit && RegString->getString() == "pc") {
4119 Opcode64Bit = AArch64::ADR;
4120 Imm = 0;
4121 } else {
4122 return false;
4123 }
4124 }
4125 }
4126
4127 SDValue InChain = N->getOperand(0);
4128 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4129 if (!ReadIs128Bit) {
4130 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4131 {SysRegImm, InChain});
4132 } else {
4133 SDNode *MRRS = CurDAG->getMachineNode(
4134 AArch64::MRRS, DL,
4135 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4136 {SysRegImm, InChain});
4137
4138 // Sysregs are not endian. The even register always contains the low half
4139 // of the register.
4140 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4141 SDValue(MRRS, 0));
4142 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4143 SDValue(MRRS, 0));
4144 SDValue OutChain = SDValue(MRRS, 1);
4145
4146 ReplaceUses(SDValue(N, 0), Lo);
4147 ReplaceUses(SDValue(N, 1), Hi);
4148 ReplaceUses(SDValue(N, 2), OutChain);
4149 };
4150 return true;
4151}
4152
4153// Lower the write_register intrinsic to an MSR instruction node if the special
4154// register string argument is either of the form detailed in the ALCE (the
4155// form described in getIntOperandsFromRegisterString) or is a named register
4156// known by the MSR SysReg mapper.
4157bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4158 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4159 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4160 SDLoc DL(N);
4161
4162 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4163
4164 if (!WriteIs128Bit) {
4165 // Check if the register was one of those allowed as the pstatefield value
4166 // in the MSR (immediate) instruction. To accept the values allowed in the
4167 // pstatefield for the MSR (immediate) instruction, we also require that an
4168 // immediate value has been provided as an argument, we know that this is
4169 // the case as it has been ensured by semantic checking.
4170 auto trySelectPState = [&](auto PMapper, unsigned State) {
4171 if (PMapper) {
4172 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4173 "Expected a constant integer expression.");
4174 unsigned Reg = PMapper->Encoding;
4175 uint64_t Immed = N->getConstantOperandVal(2);
4176 CurDAG->SelectNodeTo(
4177 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4178 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4179 return true;
4180 }
4181 return false;
4182 };
4183
4184 if (trySelectPState(
4185 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4186 AArch64::MSRpstateImm4))
4187 return true;
4188 if (trySelectPState(
4189 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4190 AArch64::MSRpstateImm1))
4191 return true;
4192 }
4193
4194 int Imm = getIntOperandFromRegisterString(RegString->getString());
4195 if (Imm == -1) {
4196 // Use the sysreg mapper to attempt to map the remaining possible strings
4197 // to the value for the register to be used for the MSR (register)
4198 // instruction operand.
4199 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4200 if (TheReg && TheReg->Writeable &&
4201 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4202 Imm = TheReg->Encoding;
4203 else
4204 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4205
4206 if (Imm == -1)
4207 return false;
4208 }
4209
4210 SDValue InChain = N->getOperand(0);
4211 if (!WriteIs128Bit) {
4212 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4213 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4214 N->getOperand(2), InChain);
4215 } else {
4216 // No endian swap. The lower half always goes into the even subreg, and the
4217 // higher half always into the odd supreg.
4218 SDNode *Pair = CurDAG->getMachineNode(
4219 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4220 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4221 MVT::i32),
4222 N->getOperand(2),
4223 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4224 N->getOperand(3),
4225 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4226
4227 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4228 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4229 SDValue(Pair, 0), InChain);
4230 }
4231
4232 return true;
4233}
4234
4235/// We've got special pseudo-instructions for these
4236bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4237 unsigned Opcode;
4238 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4239
4240 // Leave IR for LSE if subtarget supports it.
4241 if (Subtarget->hasLSE()) return false;
4242
4243 if (MemTy == MVT::i8)
4244 Opcode = AArch64::CMP_SWAP_8;
4245 else if (MemTy == MVT::i16)
4246 Opcode = AArch64::CMP_SWAP_16;
4247 else if (MemTy == MVT::i32)
4248 Opcode = AArch64::CMP_SWAP_32;
4249 else if (MemTy == MVT::i64)
4250 Opcode = AArch64::CMP_SWAP_64;
4251 else
4252 llvm_unreachable("Unknown AtomicCmpSwap type");
4253
4254 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4255 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4256 N->getOperand(0)};
4257 SDNode *CmpSwap = CurDAG->getMachineNode(
4258 Opcode, SDLoc(N),
4259 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4260
4261 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4262 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4263
4264 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4265 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4266 CurDAG->RemoveDeadNode(N);
4267
4268 return true;
4269}
4270
4271bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4272 SDValue &Shift, bool Negate) {
4273 if (!isa<ConstantSDNode>(N))
4274 return false;
4275
4276 SDLoc DL(N);
4277 APInt Val =
4278 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4279
4280 if (Negate)
4281 Val = -Val;
4282
4283 switch (VT.SimpleTy) {
4284 case MVT::i8:
4285 // All immediates are supported.
4286 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4287 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4288 return true;
4289 case MVT::i16:
4290 case MVT::i32:
4291 case MVT::i64:
4292 // Support 8bit unsigned immediates.
4293 if ((Val & ~0xff) == 0) {
4294 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4295 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4296 return true;
4297 }
4298 // Support 16bit unsigned immediates that are a multiple of 256.
4299 if ((Val & ~0xff00) == 0) {
4300 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4301 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4302 return true;
4303 }
4304 break;
4305 default:
4306 break;
4307 }
4308
4309 return false;
4310}
4311
4312bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4313 SDValue &Imm, SDValue &Shift,
4314 bool Negate) {
4315 if (!isa<ConstantSDNode>(N))
4316 return false;
4317
4318 SDLoc DL(N);
4319 int64_t Val = cast<ConstantSDNode>(N)
4320 ->getAPIntValue()
4322 .getSExtValue();
4323
4324 if (Negate)
4325 Val = -Val;
4326
4327 // Signed saturating instructions treat their immediate operand as unsigned,
4328 // whereas the related intrinsics define their operands to be signed. This
4329 // means we can only use the immediate form when the operand is non-negative.
4330 if (Val < 0)
4331 return false;
4332
4333 switch (VT.SimpleTy) {
4334 case MVT::i8:
4335 // All positive immediates are supported.
4336 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4337 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4338 return true;
4339 case MVT::i16:
4340 case MVT::i32:
4341 case MVT::i64:
4342 // Support 8bit positive immediates.
4343 if (Val <= 255) {
4344 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4345 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4346 return true;
4347 }
4348 // Support 16bit positive immediates that are a multiple of 256.
4349 if (Val <= 65280 && Val % 256 == 0) {
4350 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4351 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4352 return true;
4353 }
4354 break;
4355 default:
4356 break;
4357 }
4358
4359 return false;
4360}
4361
4362bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4363 SDValue &Shift) {
4364 if (!isa<ConstantSDNode>(N))
4365 return false;
4366
4367 SDLoc DL(N);
4368 int64_t Val = cast<ConstantSDNode>(N)
4369 ->getAPIntValue()
4370 .trunc(VT.getFixedSizeInBits())
4371 .getSExtValue();
4372 int32_t ImmVal, ShiftVal;
4373 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4374 ShiftVal))
4375 return false;
4376
4377 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4378 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4379 return true;
4380}
4381
4382bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4383 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4384 int64_t ImmVal = CNode->getSExtValue();
4385 SDLoc DL(N);
4386 if (ImmVal >= -128 && ImmVal < 128) {
4387 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4388 return true;
4389 }
4390 }
4391 return false;
4392}
4393
4394bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4395 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4396 uint64_t ImmVal = CNode->getZExtValue();
4397
4398 switch (VT.SimpleTy) {
4399 case MVT::i8:
4400 ImmVal &= 0xFF;
4401 break;
4402 case MVT::i16:
4403 ImmVal &= 0xFFFF;
4404 break;
4405 case MVT::i32:
4406 ImmVal &= 0xFFFFFFFF;
4407 break;
4408 case MVT::i64:
4409 break;
4410 default:
4411 llvm_unreachable("Unexpected type");
4412 }
4413
4414 if (ImmVal < 256) {
4415 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4416 return true;
4417 }
4418 }
4419 return false;
4420}
4421
4422bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4423 bool Invert) {
4424 uint64_t ImmVal;
4425 if (auto CI = dyn_cast<ConstantSDNode>(N))
4426 ImmVal = CI->getZExtValue();
4427 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4428 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4429 else
4430 return false;
4431
4432 if (Invert)
4433 ImmVal = ~ImmVal;
4434
4435 uint64_t encoding;
4436 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4437 return false;
4438
4439 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4440 return true;
4441}
4442
4443// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4444// Rather than attempt to normalise everything we can sometimes saturate the
4445// shift amount during selection. This function also allows for consistent
4446// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4447// required by the instructions.
4448bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4449 uint64_t High, bool AllowSaturation,
4450 SDValue &Imm) {
4451 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4452 uint64_t ImmVal = CN->getZExtValue();
4453
4454 // Reject shift amounts that are too small.
4455 if (ImmVal < Low)
4456 return false;
4457
4458 // Reject or saturate shift amounts that are too big.
4459 if (ImmVal > High) {
4460 if (!AllowSaturation)
4461 return false;
4462 ImmVal = High;
4463 }
4464
4465 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4466 return true;
4467 }
4468
4469 return false;
4470}
4471
4472bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4473 // tagp(FrameIndex, IRGstack, tag_offset):
4474 // since the offset between FrameIndex and IRGstack is a compile-time
4475 // constant, this can be lowered to a single ADDG instruction.
4476 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4477 return false;
4478 }
4479
4480 SDValue IRG_SP = N->getOperand(2);
4481 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4482 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4483 return false;
4484 }
4485
4486 const TargetLowering *TLI = getTargetLowering();
4487 SDLoc DL(N);
4488 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4489 SDValue FiOp = CurDAG->getTargetFrameIndex(
4490 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4491 int TagOffset = N->getConstantOperandVal(3);
4492
4493 SDNode *Out = CurDAG->getMachineNode(
4494 AArch64::TAGPstack, DL, MVT::i64,
4495 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4496 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4497 ReplaceNode(N, Out);
4498 return true;
4499}
4500
4501void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4502 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4503 "llvm.aarch64.tagp third argument must be an immediate");
4504 if (trySelectStackSlotTagP(N))
4505 return;
4506 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4507 // compile-time constant, not just for stack allocations.
4508
4509 // General case for unrelated pointers in Op1 and Op2.
4510 SDLoc DL(N);
4511 int TagOffset = N->getConstantOperandVal(3);
4512 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4513 {N->getOperand(1), N->getOperand(2)});
4514 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4515 {SDValue(N1, 0), N->getOperand(2)});
4516 SDNode *N3 = CurDAG->getMachineNode(
4517 AArch64::ADDG, DL, MVT::i64,
4518 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4519 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4520 ReplaceNode(N, N3);
4521}
4522
4523bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4524 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4525
4526 // Bail when not a "cast" like insert_subvector.
4527 if (N->getConstantOperandVal(2) != 0)
4528 return false;
4529 if (!N->getOperand(0).isUndef())
4530 return false;
4531
4532 // Bail when normal isel should do the job.
4533 EVT VT = N->getValueType(0);
4534 EVT InVT = N->getOperand(1).getValueType();
4535 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4536 return false;
4537 if (InVT.getSizeInBits() <= 128)
4538 return false;
4539
4540 // NOTE: We can only get here when doing fixed length SVE code generation.
4541 // We do manual selection because the types involved are not linked to real
4542 // registers (despite being legal) and must be coerced into SVE registers.
4543
4545 "Expected to insert into a packed scalable vector!");
4546
4547 SDLoc DL(N);
4548 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4549 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4550 N->getOperand(1), RC));
4551 return true;
4552}
4553
4554bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4555 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4556
4557 // Bail when not a "cast" like extract_subvector.
4558 if (N->getConstantOperandVal(1) != 0)
4559 return false;
4560
4561 // Bail when normal isel can do the job.
4562 EVT VT = N->getValueType(0);
4563 EVT InVT = N->getOperand(0).getValueType();
4564 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4565 return false;
4566 if (VT.getSizeInBits() <= 128)
4567 return false;
4568
4569 // NOTE: We can only get here when doing fixed length SVE code generation.
4570 // We do manual selection because the types involved are not linked to real
4571 // registers (despite being legal) and must be coerced into SVE registers.
4572
4574 "Expected to extract from a packed scalable vector!");
4575
4576 SDLoc DL(N);
4577 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4578 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4579 N->getOperand(0), RC));
4580 return true;
4581}
4582
4583bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4584 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4585
4586 SDValue N0 = N->getOperand(0);
4587 SDValue N1 = N->getOperand(1);
4588
4589 EVT VT = N->getValueType(0);
4590 SDLoc DL(N);
4591
4592 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4593 // Rotate by a constant is a funnel shift in IR which is exanded to
4594 // an OR with shifted operands.
4595 // We do the following transform:
4596 // OR N0, N1 -> xar (x, y, imm)
4597 // Where:
4598 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4599 // N0 = SHL_PRED true, V, splat(bits-imm)
4600 // V = (xor x, y)
4601 if (VT.isScalableVector() &&
4602 (Subtarget->hasSVE2() ||
4603 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4604 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4605 N1.getOpcode() != AArch64ISD::SRL_PRED)
4606 std::swap(N0, N1);
4607 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4608 N1.getOpcode() != AArch64ISD::SRL_PRED)
4609 return false;
4610
4611 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4612 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4613 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4614 return false;
4615
4616 if (N0.getOperand(1) != N1.getOperand(1))
4617 return false;
4618
4619 SDValue R1, R2;
4620 bool IsXOROperand = true;
4621 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4622 IsXOROperand = false;
4623 } else {
4624 R1 = N0.getOperand(1).getOperand(0);
4625 R2 = N1.getOperand(1).getOperand(1);
4626 }
4627
4628 APInt ShlAmt, ShrAmt;
4629 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4631 return false;
4632
4633 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4634 return false;
4635
4636 if (!IsXOROperand) {
4637 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4638 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4639 SDValue MOVIV = SDValue(MOV, 0);
4640
4641 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4642 SDNode *SubRegToReg = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL,
4643 VT, Zero, MOVIV, ZSub);
4644
4645 R1 = N1->getOperand(1);
4646 R2 = SDValue(SubRegToReg, 0);
4647 }
4648
4649 SDValue Imm =
4650 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4651
4652 SDValue Ops[] = {R1, R2, Imm};
4654 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4655 AArch64::XAR_ZZZI_D})) {
4656 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4657 return true;
4658 }
4659 return false;
4660 }
4661
4662 // We have Neon SHA3 XAR operation for v2i64 but for types
4663 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4664 // is available.
4665 EVT SVT;
4666 switch (VT.getSimpleVT().SimpleTy) {
4667 case MVT::v4i32:
4668 case MVT::v2i32:
4669 SVT = MVT::nxv4i32;
4670 break;
4671 case MVT::v8i16:
4672 case MVT::v4i16:
4673 SVT = MVT::nxv8i16;
4674 break;
4675 case MVT::v16i8:
4676 case MVT::v8i8:
4677 SVT = MVT::nxv16i8;
4678 break;
4679 case MVT::v2i64:
4680 case MVT::v1i64:
4681 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4682 break;
4683 default:
4684 return false;
4685 }
4686
4687 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4688 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4689 return false;
4690
4691 if (N0->getOpcode() != AArch64ISD::VSHL ||
4692 N1->getOpcode() != AArch64ISD::VLSHR)
4693 return false;
4694
4695 if (N0->getOperand(0) != N1->getOperand(0))
4696 return false;
4697
4698 SDValue R1, R2;
4699 bool IsXOROperand = true;
4700 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4701 IsXOROperand = false;
4702 } else {
4703 SDValue XOR = N0.getOperand(0);
4704 R1 = XOR.getOperand(0);
4705 R2 = XOR.getOperand(1);
4706 }
4707
4708 unsigned HsAmt = N0.getConstantOperandVal(1);
4709 unsigned ShAmt = N1.getConstantOperandVal(1);
4710
4711 SDValue Imm = CurDAG->getTargetConstant(
4712 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4713
4714 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4715 if (ShAmt + HsAmt != VTSizeInBits)
4716 return false;
4717
4718 if (!IsXOROperand) {
4719 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4720 SDNode *MOV =
4721 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4722 SDValue MOVIV = SDValue(MOV, 0);
4723
4724 R1 = N1->getOperand(0);
4725 R2 = MOVIV;
4726 }
4727
4728 if (SVT != VT) {
4729 SDValue Undef =
4730 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4731
4732 if (SVT.isScalableVector() && VT.is64BitVector()) {
4733 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4734
4735 SDValue UndefQ = SDValue(
4736 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4737 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4738
4739 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4740 UndefQ, R1, DSub),
4741 0);
4742 if (R2.getValueType() == VT)
4743 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4744 UndefQ, R2, DSub),
4745 0);
4746 }
4747
4748 SDValue SubReg = CurDAG->getTargetConstant(
4749 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4750
4751 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4752 R1, SubReg),
4753 0);
4754
4755 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4756 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4757 Undef, R2, SubReg),
4758 0);
4759 }
4760
4761 SDValue Ops[] = {R1, R2, Imm};
4762 SDNode *XAR = nullptr;
4763
4764 if (SVT.isScalableVector()) {
4766 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4767 AArch64::XAR_ZZZI_D}))
4768 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
4769 } else {
4770 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
4771 }
4772
4773 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4774
4775 if (SVT != VT) {
4776 if (VT.is64BitVector() && SVT.isScalableVector()) {
4777 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4778
4779 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4780 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
4781 SDValue(XAR, 0), ZSub);
4782
4783 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4784 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4785 SDValue(Q, 0), DSub);
4786 } else {
4787 SDValue SubReg = CurDAG->getTargetConstant(
4788 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4789 MVT::i32);
4790 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
4791 SDValue(XAR, 0), SubReg);
4792 }
4793 }
4794 ReplaceNode(N, XAR);
4795 return true;
4796}
4797
4798void AArch64DAGToDAGISel::Select(SDNode *Node) {
4799 // If we have a custom node, we already have selected!
4800 if (Node->isMachineOpcode()) {
4801 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4802 Node->setNodeId(-1);
4803 return;
4804 }
4805
4806 // Few custom selection stuff.
4807 EVT VT = Node->getValueType(0);
4808
4809 switch (Node->getOpcode()) {
4810 default:
4811 break;
4812
4814 if (SelectCMP_SWAP(Node))
4815 return;
4816 break;
4817
4818 case ISD::READ_REGISTER:
4819 case AArch64ISD::MRRS:
4820 if (tryReadRegister(Node))
4821 return;
4822 break;
4823
4825 case AArch64ISD::MSRR:
4826 if (tryWriteRegister(Node))
4827 return;
4828 break;
4829
4830 case ISD::LOAD: {
4831 // Try to select as an indexed load. Fall through to normal processing
4832 // if we can't.
4833 if (tryIndexedLoad(Node))
4834 return;
4835 break;
4836 }
4837
4838 case ISD::SRL:
4839 case ISD::AND:
4840 case ISD::SRA:
4842 if (tryBitfieldExtractOp(Node))
4843 return;
4844 if (tryBitfieldInsertInZeroOp(Node))
4845 return;
4846 [[fallthrough]];
4847 case ISD::ROTR:
4848 case ISD::SHL:
4849 if (tryShiftAmountMod(Node))
4850 return;
4851 break;
4852
4853 case ISD::SIGN_EXTEND:
4854 if (tryBitfieldExtractOpFromSExt(Node))
4855 return;
4856 break;
4857
4858 case ISD::OR:
4859 if (tryBitfieldInsertOp(Node))
4860 return;
4861 if (trySelectXAR(Node))
4862 return;
4863 break;
4864
4866 if (trySelectCastScalableToFixedLengthVector(Node))
4867 return;
4868 break;
4869 }
4870
4871 case ISD::INSERT_SUBVECTOR: {
4872 if (trySelectCastFixedLengthToScalableVector(Node))
4873 return;
4874 break;
4875 }
4876
4877 case ISD::Constant: {
4878 // Materialize zero constants as copies from WZR/XZR. This allows
4879 // the coalescer to propagate these into other instructions.
4880 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4881 if (ConstNode->isZero()) {
4882 if (VT == MVT::i32) {
4883 SDValue New = CurDAG->getCopyFromReg(
4884 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4885 ReplaceNode(Node, New.getNode());
4886 return;
4887 } else if (VT == MVT::i64) {
4888 SDValue New = CurDAG->getCopyFromReg(
4889 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4890 ReplaceNode(Node, New.getNode());
4891 return;
4892 }
4893 }
4894 break;
4895 }
4896
4897 case ISD::FrameIndex: {
4898 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4899 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4900 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4901 const TargetLowering *TLI = getTargetLowering();
4902 SDValue TFI = CurDAG->getTargetFrameIndex(
4903 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4904 SDLoc DL(Node);
4905 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4906 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4907 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4908 return;
4909 }
4911 unsigned IntNo = Node->getConstantOperandVal(1);
4912 switch (IntNo) {
4913 default:
4914 break;
4915 case Intrinsic::aarch64_gcsss: {
4916 SDLoc DL(Node);
4917 SDValue Chain = Node->getOperand(0);
4918 SDValue Val = Node->getOperand(2);
4919 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4920 SDNode *SS1 =
4921 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4922 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4923 MVT::Other, Zero, SDValue(SS1, 0));
4924 ReplaceNode(Node, SS2);
4925 return;
4926 }
4927 case Intrinsic::aarch64_ldaxp:
4928 case Intrinsic::aarch64_ldxp: {
4929 unsigned Op =
4930 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4931 SDValue MemAddr = Node->getOperand(2);
4932 SDLoc DL(Node);
4933 SDValue Chain = Node->getOperand(0);
4934
4935 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4936 MVT::Other, MemAddr, Chain);
4937
4938 // Transfer memoperands.
4939 MachineMemOperand *MemOp =
4940 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4941 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4942 ReplaceNode(Node, Ld);
4943 return;
4944 }
4945 case Intrinsic::aarch64_stlxp:
4946 case Intrinsic::aarch64_stxp: {
4947 unsigned Op =
4948 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4949 SDLoc DL(Node);
4950 SDValue Chain = Node->getOperand(0);
4951 SDValue ValLo = Node->getOperand(2);
4952 SDValue ValHi = Node->getOperand(3);
4953 SDValue MemAddr = Node->getOperand(4);
4954
4955 // Place arguments in the right order.
4956 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4957
4958 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4959 // Transfer memoperands.
4960 MachineMemOperand *MemOp =
4961 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4962 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4963
4964 ReplaceNode(Node, St);
4965 return;
4966 }
4967 case Intrinsic::aarch64_neon_ld1x2:
4968 if (VT == MVT::v8i8) {
4969 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4970 return;
4971 } else if (VT == MVT::v16i8) {
4972 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4973 return;
4974 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4975 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4976 return;
4977 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4978 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4979 return;
4980 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4981 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4982 return;
4983 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4984 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4985 return;
4986 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4987 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4988 return;
4989 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4990 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4991 return;
4992 }
4993 break;
4994 case Intrinsic::aarch64_neon_ld1x3:
4995 if (VT == MVT::v8i8) {
4996 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4997 return;
4998 } else if (VT == MVT::v16i8) {
4999 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5000 return;
5001 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5002 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5003 return;
5004 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5005 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5006 return;
5007 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5008 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5009 return;
5010 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5011 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5012 return;
5013 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5014 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5015 return;
5016 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5017 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5018 return;
5019 }
5020 break;
5021 case Intrinsic::aarch64_neon_ld1x4:
5022 if (VT == MVT::v8i8) {
5023 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5024 return;
5025 } else if (VT == MVT::v16i8) {
5026 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5027 return;
5028 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5029 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5030 return;
5031 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5032 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5033 return;
5034 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5035 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5036 return;
5037 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5038 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5039 return;
5040 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5041 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5042 return;
5043 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5044 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5045 return;
5046 }
5047 break;
5048 case Intrinsic::aarch64_neon_ld2:
5049 if (VT == MVT::v8i8) {
5050 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5051 return;
5052 } else if (VT == MVT::v16i8) {
5053 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5054 return;
5055 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5056 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5057 return;
5058 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5059 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5060 return;
5061 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5062 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5063 return;
5064 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5065 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5066 return;
5067 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5068 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5069 return;
5070 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5071 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5072 return;
5073 }
5074 break;
5075 case Intrinsic::aarch64_neon_ld3:
5076 if (VT == MVT::v8i8) {
5077 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5078 return;
5079 } else if (VT == MVT::v16i8) {
5080 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5081 return;
5082 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5083 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5084 return;
5085 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5086 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5087 return;
5088 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5089 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5090 return;
5091 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5092 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5093 return;
5094 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5095 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5096 return;
5097 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5098 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5099 return;
5100 }
5101 break;
5102 case Intrinsic::aarch64_neon_ld4:
5103 if (VT == MVT::v8i8) {
5104 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5105 return;
5106 } else if (VT == MVT::v16i8) {
5107 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5108 return;
5109 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5110 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5111 return;
5112 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5113 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5114 return;
5115 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5116 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5117 return;
5118 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5119 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5120 return;
5121 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5122 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5123 return;
5124 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5125 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5126 return;
5127 }
5128 break;
5129 case Intrinsic::aarch64_neon_ld2r:
5130 if (VT == MVT::v8i8) {
5131 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5132 return;
5133 } else if (VT == MVT::v16i8) {
5134 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5135 return;
5136 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5137 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5138 return;
5139 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5140 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5141 return;
5142 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5143 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5144 return;
5145 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5146 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5147 return;
5148 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5149 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5150 return;
5151 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5152 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5153 return;
5154 }
5155 break;
5156 case Intrinsic::aarch64_neon_ld3r:
5157 if (VT == MVT::v8i8) {
5158 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5159 return;
5160 } else if (VT == MVT::v16i8) {
5161 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5162 return;
5163 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5164 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5165 return;
5166 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5167 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5168 return;
5169 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5170 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5171 return;
5172 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5173 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5174 return;
5175 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5176 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5177 return;
5178 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5179 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5180 return;
5181 }
5182 break;
5183 case Intrinsic::aarch64_neon_ld4r:
5184 if (VT == MVT::v8i8) {
5185 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5186 return;
5187 } else if (VT == MVT::v16i8) {
5188 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5189 return;
5190 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5191 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5192 return;
5193 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5194 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5195 return;
5196 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5197 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5198 return;
5199 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5200 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5201 return;
5202 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5203 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5204 return;
5205 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5206 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5207 return;
5208 }
5209 break;
5210 case Intrinsic::aarch64_neon_ld2lane:
5211 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5212 SelectLoadLane(Node, 2, AArch64::LD2i8);
5213 return;
5214 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5215 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5216 SelectLoadLane(Node, 2, AArch64::LD2i16);
5217 return;
5218 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5219 VT == MVT::v2f32) {
5220 SelectLoadLane(Node, 2, AArch64::LD2i32);
5221 return;
5222 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5223 VT == MVT::v1f64) {
5224 SelectLoadLane(Node, 2, AArch64::LD2i64);
5225 return;
5226 }
5227 break;
5228 case Intrinsic::aarch64_neon_ld3lane:
5229 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5230 SelectLoadLane(Node, 3, AArch64::LD3i8);
5231 return;
5232 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5233 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5234 SelectLoadLane(Node, 3, AArch64::LD3i16);
5235 return;
5236 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5237 VT == MVT::v2f32) {
5238 SelectLoadLane(Node, 3, AArch64::LD3i32);
5239 return;
5240 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5241 VT == MVT::v1f64) {
5242 SelectLoadLane(Node, 3, AArch64::LD3i64);
5243 return;
5244 }
5245 break;
5246 case Intrinsic::aarch64_neon_ld4lane:
5247 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5248 SelectLoadLane(Node, 4, AArch64::LD4i8);
5249 return;
5250 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5251 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5252 SelectLoadLane(Node, 4, AArch64::LD4i16);
5253 return;
5254 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5255 VT == MVT::v2f32) {
5256 SelectLoadLane(Node, 4, AArch64::LD4i32);
5257 return;
5258 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5259 VT == MVT::v1f64) {
5260 SelectLoadLane(Node, 4, AArch64::LD4i64);
5261 return;
5262 }
5263 break;
5264 case Intrinsic::aarch64_ld64b:
5265 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5266 return;
5267 case Intrinsic::aarch64_sve_ld2q_sret: {
5268 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5269 return;
5270 }
5271 case Intrinsic::aarch64_sve_ld3q_sret: {
5272 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5273 return;
5274 }
5275 case Intrinsic::aarch64_sve_ld4q_sret: {
5276 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5277 return;
5278 }
5279 case Intrinsic::aarch64_sve_ld2_sret: {
5280 if (VT == MVT::nxv16i8) {
5281 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5282 true);
5283 return;
5284 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5285 VT == MVT::nxv8bf16) {
5286 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5287 true);
5288 return;
5289 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5290 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5291 true);
5292 return;
5293 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5294 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5295 true);
5296 return;
5297 }
5298 break;
5299 }
5300 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5301 if (VT == MVT::nxv16i8) {
5302 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5303 SelectContiguousMultiVectorLoad(
5304 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5305 else if (Subtarget->hasSVE2p1())
5306 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5307 AArch64::LD1B_2Z);
5308 else
5309 break;
5310 return;
5311 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5312 VT == MVT::nxv8bf16) {
5313 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5314 SelectContiguousMultiVectorLoad(
5315 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5316 else if (Subtarget->hasSVE2p1())
5317 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5318 AArch64::LD1H_2Z);
5319 else
5320 break;
5321 return;
5322 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5323 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5324 SelectContiguousMultiVectorLoad(
5325 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5326 else if (Subtarget->hasSVE2p1())
5327 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5328 AArch64::LD1W_2Z);
5329 else
5330 break;
5331 return;
5332 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5333 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5334 SelectContiguousMultiVectorLoad(
5335 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5336 else if (Subtarget->hasSVE2p1())
5337 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5338 AArch64::LD1D_2Z);
5339 else
5340 break;
5341 return;
5342 }
5343 break;
5344 }
5345 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5346 if (VT == MVT::nxv16i8) {
5347 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5348 SelectContiguousMultiVectorLoad(
5349 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5350 else if (Subtarget->hasSVE2p1())
5351 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5352 AArch64::LD1B_4Z);
5353 else
5354 break;
5355 return;
5356 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5357 VT == MVT::nxv8bf16) {
5358 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5359 SelectContiguousMultiVectorLoad(
5360 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5361 else if (Subtarget->hasSVE2p1())
5362 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5363 AArch64::LD1H_4Z);
5364 else
5365 break;
5366 return;
5367 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5368 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5369 SelectContiguousMultiVectorLoad(
5370 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5371 else if (Subtarget->hasSVE2p1())
5372 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5373 AArch64::LD1W_4Z);
5374 else
5375 break;
5376 return;
5377 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5378 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5379 SelectContiguousMultiVectorLoad(
5380 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5381 else if (Subtarget->hasSVE2p1())
5382 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5383 AArch64::LD1D_4Z);
5384 else
5385 break;
5386 return;
5387 }
5388 break;
5389 }
5390 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5391 if (VT == MVT::nxv16i8) {
5392 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5393 SelectContiguousMultiVectorLoad(Node, 2, 0,
5394 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5395 AArch64::LDNT1B_2Z_PSEUDO);
5396 else if (Subtarget->hasSVE2p1())
5397 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5398 AArch64::LDNT1B_2Z);
5399 else
5400 break;
5401 return;
5402 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5403 VT == MVT::nxv8bf16) {
5404 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5405 SelectContiguousMultiVectorLoad(Node, 2, 1,
5406 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5407 AArch64::LDNT1H_2Z_PSEUDO);
5408 else if (Subtarget->hasSVE2p1())
5409 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5410 AArch64::LDNT1H_2Z);
5411 else
5412 break;
5413 return;
5414 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5415 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5416 SelectContiguousMultiVectorLoad(Node, 2, 2,
5417 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5418 AArch64::LDNT1W_2Z_PSEUDO);
5419 else if (Subtarget->hasSVE2p1())
5420 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5421 AArch64::LDNT1W_2Z);
5422 else
5423 break;
5424 return;
5425 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5426 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5427 SelectContiguousMultiVectorLoad(Node, 2, 3,
5428 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5429 AArch64::LDNT1D_2Z_PSEUDO);
5430 else if (Subtarget->hasSVE2p1())
5431 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5432 AArch64::LDNT1D_2Z);
5433 else
5434 break;
5435 return;
5436 }
5437 break;
5438 }
5439 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5440 if (VT == MVT::nxv16i8) {
5441 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5442 SelectContiguousMultiVectorLoad(Node, 4, 0,
5443 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5444 AArch64::LDNT1B_4Z_PSEUDO);
5445 else if (Subtarget->hasSVE2p1())
5446 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5447 AArch64::LDNT1B_4Z);
5448 else
5449 break;
5450 return;
5451 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5452 VT == MVT::nxv8bf16) {
5453 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5454 SelectContiguousMultiVectorLoad(Node, 4, 1,
5455 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5456 AArch64::LDNT1H_4Z_PSEUDO);
5457 else if (Subtarget->hasSVE2p1())
5458 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5459 AArch64::LDNT1H_4Z);
5460 else
5461 break;
5462 return;
5463 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5464 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5465 SelectContiguousMultiVectorLoad(Node, 4, 2,
5466 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5467 AArch64::LDNT1W_4Z_PSEUDO);
5468 else if (Subtarget->hasSVE2p1())
5469 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5470 AArch64::LDNT1W_4Z);
5471 else
5472 break;
5473 return;
5474 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5475 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5476 SelectContiguousMultiVectorLoad(Node, 4, 3,
5477 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5478 AArch64::LDNT1D_4Z_PSEUDO);
5479 else if (Subtarget->hasSVE2p1())
5480 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5481 AArch64::LDNT1D_4Z);
5482 else
5483 break;
5484 return;
5485 }
5486 break;
5487 }
5488 case Intrinsic::aarch64_sve_ld3_sret: {
5489 if (VT == MVT::nxv16i8) {
5490 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5491 true);
5492 return;
5493 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5494 VT == MVT::nxv8bf16) {
5495 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5496 true);
5497 return;
5498 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5499 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5500 true);
5501 return;
5502 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5503 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5504 true);
5505 return;
5506 }
5507 break;
5508 }
5509 case Intrinsic::aarch64_sve_ld4_sret: {
5510 if (VT == MVT::nxv16i8) {
5511 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5512 true);
5513 return;
5514 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5515 VT == MVT::nxv8bf16) {
5516 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5517 true);
5518 return;
5519 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5520 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5521 true);
5522 return;
5523 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5524 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5525 true);
5526 return;
5527 }
5528 break;
5529 }
5530 case Intrinsic::aarch64_sme_read_hor_vg2: {
5531 if (VT == MVT::nxv16i8) {
5532 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5533 AArch64::MOVA_2ZMXI_H_B);
5534 return;
5535 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5536 VT == MVT::nxv8bf16) {
5537 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5538 AArch64::MOVA_2ZMXI_H_H);
5539 return;
5540 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5541 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5542 AArch64::MOVA_2ZMXI_H_S);
5543 return;
5544 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5545 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5546 AArch64::MOVA_2ZMXI_H_D);
5547 return;
5548 }
5549 break;
5550 }
5551 case Intrinsic::aarch64_sme_read_ver_vg2: {
5552 if (VT == MVT::nxv16i8) {
5553 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5554 AArch64::MOVA_2ZMXI_V_B);
5555 return;
5556 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5557 VT == MVT::nxv8bf16) {
5558 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5559 AArch64::MOVA_2ZMXI_V_H);
5560 return;
5561 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5562 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5563 AArch64::MOVA_2ZMXI_V_S);
5564 return;
5565 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5566 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5567 AArch64::MOVA_2ZMXI_V_D);
5568 return;
5569 }
5570 break;
5571 }
5572 case Intrinsic::aarch64_sme_read_hor_vg4: {
5573 if (VT == MVT::nxv16i8) {
5574 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5575 AArch64::MOVA_4ZMXI_H_B);
5576 return;
5577 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5578 VT == MVT::nxv8bf16) {
5579 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5580 AArch64::MOVA_4ZMXI_H_H);
5581 return;
5582 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5583 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5584 AArch64::MOVA_4ZMXI_H_S);
5585 return;
5586 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5587 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5588 AArch64::MOVA_4ZMXI_H_D);
5589 return;
5590 }
5591 break;
5592 }
5593 case Intrinsic::aarch64_sme_read_ver_vg4: {
5594 if (VT == MVT::nxv16i8) {
5595 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5596 AArch64::MOVA_4ZMXI_V_B);
5597 return;
5598 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5599 VT == MVT::nxv8bf16) {
5600 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5601 AArch64::MOVA_4ZMXI_V_H);
5602 return;
5603 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5604 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5605 AArch64::MOVA_4ZMXI_V_S);
5606 return;
5607 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5608 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5609 AArch64::MOVA_4ZMXI_V_D);
5610 return;
5611 }
5612 break;
5613 }
5614 case Intrinsic::aarch64_sme_read_vg1x2: {
5615 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5616 AArch64::MOVA_VG2_2ZMXI);
5617 return;
5618 }
5619 case Intrinsic::aarch64_sme_read_vg1x4: {
5620 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5621 AArch64::MOVA_VG4_4ZMXI);
5622 return;
5623 }
5624 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5625 if (VT == MVT::nxv16i8) {
5626 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5627 return;
5628 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5629 VT == MVT::nxv8bf16) {
5630 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5631 return;
5632 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5633 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5634 return;
5635 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5636 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5637 return;
5638 }
5639 break;
5640 }
5641 case Intrinsic::aarch64_sme_readz_vert_x2: {
5642 if (VT == MVT::nxv16i8) {
5643 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5644 return;
5645 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5646 VT == MVT::nxv8bf16) {
5647 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5648 return;
5649 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5650 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5651 return;
5652 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5653 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5654 return;
5655 }
5656 break;
5657 }
5658 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5659 if (VT == MVT::nxv16i8) {
5660 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5661 return;
5662 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5663 VT == MVT::nxv8bf16) {
5664 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5665 return;
5666 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5667 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5668 return;
5669 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5670 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5671 return;
5672 }
5673 break;
5674 }
5675 case Intrinsic::aarch64_sme_readz_vert_x4: {
5676 if (VT == MVT::nxv16i8) {
5677 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5678 return;
5679 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5680 VT == MVT::nxv8bf16) {
5681 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5682 return;
5683 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5684 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5685 return;
5686 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5687 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5688 return;
5689 }
5690 break;
5691 }
5692 case Intrinsic::aarch64_sme_readz_x2: {
5693 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5694 AArch64::ZA);
5695 return;
5696 }
5697 case Intrinsic::aarch64_sme_readz_x4: {
5698 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5699 AArch64::ZA);
5700 return;
5701 }
5702 case Intrinsic::swift_async_context_addr: {
5703 SDLoc DL(Node);
5704 SDValue Chain = Node->getOperand(0);
5705 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5706 SDValue Res = SDValue(
5707 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5708 CurDAG->getTargetConstant(8, DL, MVT::i32),
5709 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5710 0);
5711 ReplaceUses(SDValue(Node, 0), Res);
5712 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5713 CurDAG->RemoveDeadNode(Node);
5714
5715 auto &MF = CurDAG->getMachineFunction();
5716 MF.getFrameInfo().setFrameAddressIsTaken(true);
5717 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5718 return;
5719 }
5720 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5722 Node->getValueType(0),
5723 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5724 AArch64::LUTI2_4ZTZI_S}))
5725 // Second Immediate must be <= 3:
5726 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5727 return;
5728 }
5729 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5731 Node->getValueType(0),
5732 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5733 // Second Immediate must be <= 1:
5734 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5735 return;
5736 }
5737 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5739 Node->getValueType(0),
5740 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5741 AArch64::LUTI2_2ZTZI_S}))
5742 // Second Immediate must be <= 7:
5743 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5744 return;
5745 }
5746 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5748 Node->getValueType(0),
5749 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5750 AArch64::LUTI4_2ZTZI_S}))
5751 // Second Immediate must be <= 3:
5752 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5753 return;
5754 }
5755 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5756 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5757 return;
5758 }
5759 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5761 Node->getValueType(0),
5762 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5763 SelectCVTIntrinsicFP8(Node, 2, Opc);
5764 return;
5765 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5767 Node->getValueType(0),
5768 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5769 SelectCVTIntrinsicFP8(Node, 2, Opc);
5770 return;
5771 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5773 Node->getValueType(0),
5774 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5775 SelectCVTIntrinsicFP8(Node, 2, Opc);
5776 return;
5777 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5779 Node->getValueType(0),
5780 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5781 SelectCVTIntrinsicFP8(Node, 2, Opc);
5782 return;
5783 }
5784 } break;
5786 unsigned IntNo = Node->getConstantOperandVal(0);
5787 switch (IntNo) {
5788 default:
5789 break;
5790 case Intrinsic::aarch64_tagp:
5791 SelectTagP(Node);
5792 return;
5793
5794 case Intrinsic::ptrauth_auth:
5795 SelectPtrauthAuth(Node);
5796 return;
5797
5798 case Intrinsic::ptrauth_resign:
5799 SelectPtrauthResign(Node);
5800 return;
5801
5802 case Intrinsic::aarch64_neon_tbl2:
5803 SelectTable(Node, 2,
5804 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5805 false);
5806 return;
5807 case Intrinsic::aarch64_neon_tbl3:
5808 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5809 : AArch64::TBLv16i8Three,
5810 false);
5811 return;
5812 case Intrinsic::aarch64_neon_tbl4:
5813 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5814 : AArch64::TBLv16i8Four,
5815 false);
5816 return;
5817 case Intrinsic::aarch64_neon_tbx2:
5818 SelectTable(Node, 2,
5819 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5820 true);
5821 return;
5822 case Intrinsic::aarch64_neon_tbx3:
5823 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5824 : AArch64::TBXv16i8Three,
5825 true);
5826 return;
5827 case Intrinsic::aarch64_neon_tbx4:
5828 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5829 : AArch64::TBXv16i8Four,
5830 true);
5831 return;
5832 case Intrinsic::aarch64_sve_srshl_single_x2:
5834 Node->getValueType(0),
5835 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5836 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5837 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5838 return;
5839 case Intrinsic::aarch64_sve_srshl_single_x4:
5841 Node->getValueType(0),
5842 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5843 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5844 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5845 return;
5846 case Intrinsic::aarch64_sve_urshl_single_x2:
5848 Node->getValueType(0),
5849 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5850 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5851 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5852 return;
5853 case Intrinsic::aarch64_sve_urshl_single_x4:
5855 Node->getValueType(0),
5856 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5857 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5858 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5859 return;
5860 case Intrinsic::aarch64_sve_srshl_x2:
5862 Node->getValueType(0),
5863 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5864 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5865 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5866 return;
5867 case Intrinsic::aarch64_sve_srshl_x4:
5869 Node->getValueType(0),
5870 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5871 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5872 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5873 return;
5874 case Intrinsic::aarch64_sve_urshl_x2:
5876 Node->getValueType(0),
5877 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5878 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5879 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5880 return;
5881 case Intrinsic::aarch64_sve_urshl_x4:
5883 Node->getValueType(0),
5884 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5885 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5886 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5887 return;
5888 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5890 Node->getValueType(0),
5891 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5892 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5893 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5894 return;
5895 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5897 Node->getValueType(0),
5898 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5899 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5900 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5901 return;
5902 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5904 Node->getValueType(0),
5905 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5906 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5907 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5908 return;
5909 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5911 Node->getValueType(0),
5912 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5913 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5914 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5915 return;
5916 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
5918 Node->getValueType(0),
5919 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
5920 AArch64::FSCALE_2ZZ_D}))
5921 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5922 return;
5923 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
5925 Node->getValueType(0),
5926 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
5927 AArch64::FSCALE_4ZZ_D}))
5928 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5929 return;
5930 case Intrinsic::aarch64_sme_fp8_scale_x2:
5932 Node->getValueType(0),
5933 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
5934 AArch64::FSCALE_2Z2Z_D}))
5935 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5936 return;
5937 case Intrinsic::aarch64_sme_fp8_scale_x4:
5939 Node->getValueType(0),
5940 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
5941 AArch64::FSCALE_4Z4Z_D}))
5942 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5943 return;
5944 case Intrinsic::aarch64_sve_whilege_x2:
5946 Node->getValueType(0),
5947 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5948 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5949 SelectWhilePair(Node, Op);
5950 return;
5951 case Intrinsic::aarch64_sve_whilegt_x2:
5953 Node->getValueType(0),
5954 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5955 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5956 SelectWhilePair(Node, Op);
5957 return;
5958 case Intrinsic::aarch64_sve_whilehi_x2:
5960 Node->getValueType(0),
5961 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5962 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5963 SelectWhilePair(Node, Op);
5964 return;
5965 case Intrinsic::aarch64_sve_whilehs_x2:
5967 Node->getValueType(0),
5968 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5969 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5970 SelectWhilePair(Node, Op);
5971 return;
5972 case Intrinsic::aarch64_sve_whilele_x2:
5974 Node->getValueType(0),
5975 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5976 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5977 SelectWhilePair(Node, Op);
5978 return;
5979 case Intrinsic::aarch64_sve_whilelo_x2:
5981 Node->getValueType(0),
5982 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5983 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5984 SelectWhilePair(Node, Op);
5985 return;
5986 case Intrinsic::aarch64_sve_whilels_x2:
5988 Node->getValueType(0),
5989 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5990 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5991 SelectWhilePair(Node, Op);
5992 return;
5993 case Intrinsic::aarch64_sve_whilelt_x2:
5995 Node->getValueType(0),
5996 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5997 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5998 SelectWhilePair(Node, Op);
5999 return;
6000 case Intrinsic::aarch64_sve_smax_single_x2:
6002 Node->getValueType(0),
6003 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6004 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6005 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6006 return;
6007 case Intrinsic::aarch64_sve_umax_single_x2:
6009 Node->getValueType(0),
6010 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6011 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6012 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6013 return;
6014 case Intrinsic::aarch64_sve_fmax_single_x2:
6016 Node->getValueType(0),
6017 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6018 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6019 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6020 return;
6021 case Intrinsic::aarch64_sve_smax_single_x4:
6023 Node->getValueType(0),
6024 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6025 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6026 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6027 return;
6028 case Intrinsic::aarch64_sve_umax_single_x4:
6030 Node->getValueType(0),
6031 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6032 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6033 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6034 return;
6035 case Intrinsic::aarch64_sve_fmax_single_x4:
6037 Node->getValueType(0),
6038 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6039 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6040 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6041 return;
6042 case Intrinsic::aarch64_sve_smin_single_x2:
6044 Node->getValueType(0),
6045 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6046 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6047 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6048 return;
6049 case Intrinsic::aarch64_sve_umin_single_x2:
6051 Node->getValueType(0),
6052 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6053 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6054 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6055 return;
6056 case Intrinsic::aarch64_sve_fmin_single_x2:
6058 Node->getValueType(0),
6059 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6060 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6061 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6062 return;
6063 case Intrinsic::aarch64_sve_smin_single_x4:
6065 Node->getValueType(0),
6066 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6067 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6068 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6069 return;
6070 case Intrinsic::aarch64_sve_umin_single_x4:
6072 Node->getValueType(0),
6073 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6074 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6075 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6076 return;
6077 case Intrinsic::aarch64_sve_fmin_single_x4:
6079 Node->getValueType(0),
6080 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6081 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6082 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6083 return;
6084 case Intrinsic::aarch64_sve_smax_x2:
6086 Node->getValueType(0),
6087 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6088 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6089 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6090 return;
6091 case Intrinsic::aarch64_sve_umax_x2:
6093 Node->getValueType(0),
6094 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6095 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6096 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6097 return;
6098 case Intrinsic::aarch64_sve_fmax_x2:
6100 Node->getValueType(0),
6101 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6102 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6103 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6104 return;
6105 case Intrinsic::aarch64_sve_smax_x4:
6107 Node->getValueType(0),
6108 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6109 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6110 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6111 return;
6112 case Intrinsic::aarch64_sve_umax_x4:
6114 Node->getValueType(0),
6115 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6116 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6117 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6118 return;
6119 case Intrinsic::aarch64_sve_fmax_x4:
6121 Node->getValueType(0),
6122 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6123 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6124 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6125 return;
6126 case Intrinsic::aarch64_sme_famax_x2:
6128 Node->getValueType(0),
6129 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6130 AArch64::FAMAX_2Z2Z_D}))
6131 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6132 return;
6133 case Intrinsic::aarch64_sme_famax_x4:
6135 Node->getValueType(0),
6136 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6137 AArch64::FAMAX_4Z4Z_D}))
6138 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6139 return;
6140 case Intrinsic::aarch64_sme_famin_x2:
6142 Node->getValueType(0),
6143 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6144 AArch64::FAMIN_2Z2Z_D}))
6145 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6146 return;
6147 case Intrinsic::aarch64_sme_famin_x4:
6149 Node->getValueType(0),
6150 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6151 AArch64::FAMIN_4Z4Z_D}))
6152 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6153 return;
6154 case Intrinsic::aarch64_sve_smin_x2:
6156 Node->getValueType(0),
6157 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6158 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6159 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6160 return;
6161 case Intrinsic::aarch64_sve_umin_x2:
6163 Node->getValueType(0),
6164 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6165 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6166 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6167 return;
6168 case Intrinsic::aarch64_sve_fmin_x2:
6170 Node->getValueType(0),
6171 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6172 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6173 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6174 return;
6175 case Intrinsic::aarch64_sve_smin_x4:
6177 Node->getValueType(0),
6178 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6179 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6180 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6181 return;
6182 case Intrinsic::aarch64_sve_umin_x4:
6184 Node->getValueType(0),
6185 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6186 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6187 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6188 return;
6189 case Intrinsic::aarch64_sve_fmin_x4:
6191 Node->getValueType(0),
6192 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6193 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6194 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6195 return;
6196 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6198 Node->getValueType(0),
6199 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6200 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6201 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6202 return;
6203 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6205 Node->getValueType(0),
6206 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6207 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6208 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6209 return;
6210 case Intrinsic::aarch64_sve_fminnm_single_x2:
6212 Node->getValueType(0),
6213 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6214 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6215 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6216 return;
6217 case Intrinsic::aarch64_sve_fminnm_single_x4:
6219 Node->getValueType(0),
6220 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6221 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6222 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6223 return;
6224 case Intrinsic::aarch64_sve_fscale_single_x4:
6225 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6226 return;
6227 case Intrinsic::aarch64_sve_fscale_single_x2:
6228 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6229 return;
6230 case Intrinsic::aarch64_sve_fmul_single_x4:
6232 Node->getValueType(0),
6233 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6234 AArch64::FMUL_4ZZ_D}))
6235 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6236 return;
6237 case Intrinsic::aarch64_sve_fmul_single_x2:
6239 Node->getValueType(0),
6240 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6241 AArch64::FMUL_2ZZ_D}))
6242 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6243 return;
6244 case Intrinsic::aarch64_sve_fmaxnm_x2:
6246 Node->getValueType(0),
6247 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6248 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6249 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6250 return;
6251 case Intrinsic::aarch64_sve_fmaxnm_x4:
6253 Node->getValueType(0),
6254 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6255 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6256 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6257 return;
6258 case Intrinsic::aarch64_sve_fminnm_x2:
6260 Node->getValueType(0),
6261 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6262 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6263 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6264 return;
6265 case Intrinsic::aarch64_sve_fminnm_x4:
6267 Node->getValueType(0),
6268 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6269 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6270 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6271 return;
6272 case Intrinsic::aarch64_sve_fscale_x4:
6273 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6274 return;
6275 case Intrinsic::aarch64_sve_fscale_x2:
6276 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6277 return;
6278 case Intrinsic::aarch64_sve_fmul_x4:
6280 Node->getValueType(0),
6281 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6282 AArch64::FMUL_4Z4Z_D}))
6283 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6284 return;
6285 case Intrinsic::aarch64_sve_fmul_x2:
6287 Node->getValueType(0),
6288 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6289 AArch64::FMUL_2Z2Z_D}))
6290 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6291 return;
6292 case Intrinsic::aarch64_sve_fcvtzs_x2:
6293 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6294 return;
6295 case Intrinsic::aarch64_sve_scvtf_x2:
6296 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6297 return;
6298 case Intrinsic::aarch64_sve_fcvtzu_x2:
6299 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6300 return;
6301 case Intrinsic::aarch64_sve_ucvtf_x2:
6302 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6303 return;
6304 case Intrinsic::aarch64_sve_fcvtzs_x4:
6305 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6306 return;
6307 case Intrinsic::aarch64_sve_scvtf_x4:
6308 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6309 return;
6310 case Intrinsic::aarch64_sve_fcvtzu_x4:
6311 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6312 return;
6313 case Intrinsic::aarch64_sve_ucvtf_x4:
6314 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6315 return;
6316 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6317 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6318 return;
6319 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6320 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6321 return;
6322 case Intrinsic::aarch64_sve_sclamp_single_x2:
6324 Node->getValueType(0),
6325 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6326 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6327 SelectClamp(Node, 2, Op);
6328 return;
6329 case Intrinsic::aarch64_sve_uclamp_single_x2:
6331 Node->getValueType(0),
6332 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6333 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6334 SelectClamp(Node, 2, Op);
6335 return;
6336 case Intrinsic::aarch64_sve_fclamp_single_x2:
6338 Node->getValueType(0),
6339 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6340 AArch64::FCLAMP_VG2_2Z2Z_D}))
6341 SelectClamp(Node, 2, Op);
6342 return;
6343 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6344 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6345 return;
6346 case Intrinsic::aarch64_sve_sclamp_single_x4:
6348 Node->getValueType(0),
6349 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6350 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6351 SelectClamp(Node, 4, Op);
6352 return;
6353 case Intrinsic::aarch64_sve_uclamp_single_x4:
6355 Node->getValueType(0),
6356 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6357 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6358 SelectClamp(Node, 4, Op);
6359 return;
6360 case Intrinsic::aarch64_sve_fclamp_single_x4:
6362 Node->getValueType(0),
6363 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6364 AArch64::FCLAMP_VG4_4Z4Z_D}))
6365 SelectClamp(Node, 4, Op);
6366 return;
6367 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6368 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6369 return;
6370 case Intrinsic::aarch64_sve_add_single_x2:
6372 Node->getValueType(0),
6373 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6374 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6375 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6376 return;
6377 case Intrinsic::aarch64_sve_add_single_x4:
6379 Node->getValueType(0),
6380 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6381 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6382 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6383 return;
6384 case Intrinsic::aarch64_sve_zip_x2:
6386 Node->getValueType(0),
6387 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6388 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6389 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6390 return;
6391 case Intrinsic::aarch64_sve_zipq_x2:
6392 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6393 AArch64::ZIP_VG2_2ZZZ_Q);
6394 return;
6395 case Intrinsic::aarch64_sve_zip_x4:
6397 Node->getValueType(0),
6398 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6399 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6400 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6401 return;
6402 case Intrinsic::aarch64_sve_zipq_x4:
6403 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6404 AArch64::ZIP_VG4_4Z4Z_Q);
6405 return;
6406 case Intrinsic::aarch64_sve_uzp_x2:
6408 Node->getValueType(0),
6409 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6410 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6411 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6412 return;
6413 case Intrinsic::aarch64_sve_uzpq_x2:
6414 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6415 AArch64::UZP_VG2_2ZZZ_Q);
6416 return;
6417 case Intrinsic::aarch64_sve_uzp_x4:
6419 Node->getValueType(0),
6420 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6421 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6422 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6423 return;
6424 case Intrinsic::aarch64_sve_uzpq_x4:
6425 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6426 AArch64::UZP_VG4_4Z4Z_Q);
6427 return;
6428 case Intrinsic::aarch64_sve_sel_x2:
6430 Node->getValueType(0),
6431 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6432 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6433 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6434 return;
6435 case Intrinsic::aarch64_sve_sel_x4:
6437 Node->getValueType(0),
6438 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6439 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6440 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6441 return;
6442 case Intrinsic::aarch64_sve_frinta_x2:
6443 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6444 return;
6445 case Intrinsic::aarch64_sve_frinta_x4:
6446 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6447 return;
6448 case Intrinsic::aarch64_sve_frintm_x2:
6449 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6450 return;
6451 case Intrinsic::aarch64_sve_frintm_x4:
6452 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6453 return;
6454 case Intrinsic::aarch64_sve_frintn_x2:
6455 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6456 return;
6457 case Intrinsic::aarch64_sve_frintn_x4:
6458 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6459 return;
6460 case Intrinsic::aarch64_sve_frintp_x2:
6461 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6462 return;
6463 case Intrinsic::aarch64_sve_frintp_x4:
6464 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6465 return;
6466 case Intrinsic::aarch64_sve_sunpk_x2:
6468 Node->getValueType(0),
6469 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6470 AArch64::SUNPK_VG2_2ZZ_D}))
6471 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6472 return;
6473 case Intrinsic::aarch64_sve_uunpk_x2:
6475 Node->getValueType(0),
6476 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6477 AArch64::UUNPK_VG2_2ZZ_D}))
6478 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6479 return;
6480 case Intrinsic::aarch64_sve_sunpk_x4:
6482 Node->getValueType(0),
6483 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6484 AArch64::SUNPK_VG4_4Z2Z_D}))
6485 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6486 return;
6487 case Intrinsic::aarch64_sve_uunpk_x4:
6489 Node->getValueType(0),
6490 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6491 AArch64::UUNPK_VG4_4Z2Z_D}))
6492 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6493 return;
6494 case Intrinsic::aarch64_sve_pext_x2: {
6496 Node->getValueType(0),
6497 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6498 AArch64::PEXT_2PCI_D}))
6499 SelectPExtPair(Node, Op);
6500 return;
6501 }
6502 }
6503 break;
6504 }
6505 case ISD::INTRINSIC_VOID: {
6506 unsigned IntNo = Node->getConstantOperandVal(1);
6507 if (Node->getNumOperands() >= 3)
6508 VT = Node->getOperand(2)->getValueType(0);
6509 switch (IntNo) {
6510 default:
6511 break;
6512 case Intrinsic::aarch64_neon_st1x2: {
6513 if (VT == MVT::v8i8) {
6514 SelectStore(Node, 2, AArch64::ST1Twov8b);
6515 return;
6516 } else if (VT == MVT::v16i8) {
6517 SelectStore(Node, 2, AArch64::ST1Twov16b);
6518 return;
6519 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6520 VT == MVT::v4bf16) {
6521 SelectStore(Node, 2, AArch64::ST1Twov4h);
6522 return;
6523 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6524 VT == MVT::v8bf16) {
6525 SelectStore(Node, 2, AArch64::ST1Twov8h);
6526 return;
6527 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6528 SelectStore(Node, 2, AArch64::ST1Twov2s);
6529 return;
6530 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6531 SelectStore(Node, 2, AArch64::ST1Twov4s);
6532 return;
6533 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6534 SelectStore(Node, 2, AArch64::ST1Twov2d);
6535 return;
6536 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6537 SelectStore(Node, 2, AArch64::ST1Twov1d);
6538 return;
6539 }
6540 break;
6541 }
6542 case Intrinsic::aarch64_neon_st1x3: {
6543 if (VT == MVT::v8i8) {
6544 SelectStore(Node, 3, AArch64::ST1Threev8b);
6545 return;
6546 } else if (VT == MVT::v16i8) {
6547 SelectStore(Node, 3, AArch64::ST1Threev16b);
6548 return;
6549 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6550 VT == MVT::v4bf16) {
6551 SelectStore(Node, 3, AArch64::ST1Threev4h);
6552 return;
6553 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6554 VT == MVT::v8bf16) {
6555 SelectStore(Node, 3, AArch64::ST1Threev8h);
6556 return;
6557 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6558 SelectStore(Node, 3, AArch64::ST1Threev2s);
6559 return;
6560 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6561 SelectStore(Node, 3, AArch64::ST1Threev4s);
6562 return;
6563 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6564 SelectStore(Node, 3, AArch64::ST1Threev2d);
6565 return;
6566 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6567 SelectStore(Node, 3, AArch64::ST1Threev1d);
6568 return;
6569 }
6570 break;
6571 }
6572 case Intrinsic::aarch64_neon_st1x4: {
6573 if (VT == MVT::v8i8) {
6574 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6575 return;
6576 } else if (VT == MVT::v16i8) {
6577 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6578 return;
6579 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6580 VT == MVT::v4bf16) {
6581 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6582 return;
6583 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6584 VT == MVT::v8bf16) {
6585 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6586 return;
6587 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6588 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6589 return;
6590 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6591 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6592 return;
6593 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6594 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6595 return;
6596 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6597 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6598 return;
6599 }
6600 break;
6601 }
6602 case Intrinsic::aarch64_neon_st2: {
6603 if (VT == MVT::v8i8) {
6604 SelectStore(Node, 2, AArch64::ST2Twov8b);
6605 return;
6606 } else if (VT == MVT::v16i8) {
6607 SelectStore(Node, 2, AArch64::ST2Twov16b);
6608 return;
6609 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6610 VT == MVT::v4bf16) {
6611 SelectStore(Node, 2, AArch64::ST2Twov4h);
6612 return;
6613 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6614 VT == MVT::v8bf16) {
6615 SelectStore(Node, 2, AArch64::ST2Twov8h);
6616 return;
6617 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6618 SelectStore(Node, 2, AArch64::ST2Twov2s);
6619 return;
6620 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6621 SelectStore(Node, 2, AArch64::ST2Twov4s);
6622 return;
6623 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6624 SelectStore(Node, 2, AArch64::ST2Twov2d);
6625 return;
6626 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6627 SelectStore(Node, 2, AArch64::ST1Twov1d);
6628 return;
6629 }
6630 break;
6631 }
6632 case Intrinsic::aarch64_neon_st3: {
6633 if (VT == MVT::v8i8) {
6634 SelectStore(Node, 3, AArch64::ST3Threev8b);
6635 return;
6636 } else if (VT == MVT::v16i8) {
6637 SelectStore(Node, 3, AArch64::ST3Threev16b);
6638 return;
6639 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6640 VT == MVT::v4bf16) {
6641 SelectStore(Node, 3, AArch64::ST3Threev4h);
6642 return;
6643 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6644 VT == MVT::v8bf16) {
6645 SelectStore(Node, 3, AArch64::ST3Threev8h);
6646 return;
6647 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6648 SelectStore(Node, 3, AArch64::ST3Threev2s);
6649 return;
6650 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6651 SelectStore(Node, 3, AArch64::ST3Threev4s);
6652 return;
6653 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6654 SelectStore(Node, 3, AArch64::ST3Threev2d);
6655 return;
6656 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6657 SelectStore(Node, 3, AArch64::ST1Threev1d);
6658 return;
6659 }
6660 break;
6661 }
6662 case Intrinsic::aarch64_neon_st4: {
6663 if (VT == MVT::v8i8) {
6664 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6665 return;
6666 } else if (VT == MVT::v16i8) {
6667 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6668 return;
6669 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6670 VT == MVT::v4bf16) {
6671 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6672 return;
6673 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6674 VT == MVT::v8bf16) {
6675 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6676 return;
6677 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6678 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6679 return;
6680 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6681 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6682 return;
6683 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6684 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6685 return;
6686 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6687 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6688 return;
6689 }
6690 break;
6691 }
6692 case Intrinsic::aarch64_neon_st2lane: {
6693 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6694 SelectStoreLane(Node, 2, AArch64::ST2i8);
6695 return;
6696 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6697 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6698 SelectStoreLane(Node, 2, AArch64::ST2i16);
6699 return;
6700 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6701 VT == MVT::v2f32) {
6702 SelectStoreLane(Node, 2, AArch64::ST2i32);
6703 return;
6704 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6705 VT == MVT::v1f64) {
6706 SelectStoreLane(Node, 2, AArch64::ST2i64);
6707 return;
6708 }
6709 break;
6710 }
6711 case Intrinsic::aarch64_neon_st3lane: {
6712 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6713 SelectStoreLane(Node, 3, AArch64::ST3i8);
6714 return;
6715 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6716 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6717 SelectStoreLane(Node, 3, AArch64::ST3i16);
6718 return;
6719 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6720 VT == MVT::v2f32) {
6721 SelectStoreLane(Node, 3, AArch64::ST3i32);
6722 return;
6723 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6724 VT == MVT::v1f64) {
6725 SelectStoreLane(Node, 3, AArch64::ST3i64);
6726 return;
6727 }
6728 break;
6729 }
6730 case Intrinsic::aarch64_neon_st4lane: {
6731 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6732 SelectStoreLane(Node, 4, AArch64::ST4i8);
6733 return;
6734 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6735 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6736 SelectStoreLane(Node, 4, AArch64::ST4i16);
6737 return;
6738 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6739 VT == MVT::v2f32) {
6740 SelectStoreLane(Node, 4, AArch64::ST4i32);
6741 return;
6742 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6743 VT == MVT::v1f64) {
6744 SelectStoreLane(Node, 4, AArch64::ST4i64);
6745 return;
6746 }
6747 break;
6748 }
6749 case Intrinsic::aarch64_sve_st2q: {
6750 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6751 return;
6752 }
6753 case Intrinsic::aarch64_sve_st3q: {
6754 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6755 return;
6756 }
6757 case Intrinsic::aarch64_sve_st4q: {
6758 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6759 return;
6760 }
6761 case Intrinsic::aarch64_sve_st2: {
6762 if (VT == MVT::nxv16i8) {
6763 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6764 return;
6765 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6766 VT == MVT::nxv8bf16) {
6767 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6768 return;
6769 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6770 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6771 return;
6772 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6773 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6774 return;
6775 }
6776 break;
6777 }
6778 case Intrinsic::aarch64_sve_st3: {
6779 if (VT == MVT::nxv16i8) {
6780 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6781 return;
6782 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6783 VT == MVT::nxv8bf16) {
6784 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6785 return;
6786 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6787 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6788 return;
6789 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6790 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6791 return;
6792 }
6793 break;
6794 }
6795 case Intrinsic::aarch64_sve_st4: {
6796 if (VT == MVT::nxv16i8) {
6797 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6798 return;
6799 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6800 VT == MVT::nxv8bf16) {
6801 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6802 return;
6803 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6804 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6805 return;
6806 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6807 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6808 return;
6809 }
6810 break;
6811 }
6812 }
6813 break;
6814 }
6815 case AArch64ISD::LD2post: {
6816 if (VT == MVT::v8i8) {
6817 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6818 return;
6819 } else if (VT == MVT::v16i8) {
6820 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6821 return;
6822 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6823 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6824 return;
6825 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6826 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6827 return;
6828 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6829 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6830 return;
6831 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6832 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6833 return;
6834 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6835 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6836 return;
6837 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6838 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6839 return;
6840 }
6841 break;
6842 }
6843 case AArch64ISD::LD3post: {
6844 if (VT == MVT::v8i8) {
6845 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6846 return;
6847 } else if (VT == MVT::v16i8) {
6848 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6849 return;
6850 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6851 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6852 return;
6853 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6854 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6855 return;
6856 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6857 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6858 return;
6859 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6860 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6861 return;
6862 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6863 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6864 return;
6865 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6866 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6867 return;
6868 }
6869 break;
6870 }
6871 case AArch64ISD::LD4post: {
6872 if (VT == MVT::v8i8) {
6873 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6874 return;
6875 } else if (VT == MVT::v16i8) {
6876 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6877 return;
6878 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6879 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6880 return;
6881 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6882 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6883 return;
6884 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6885 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6886 return;
6887 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6888 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6889 return;
6890 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6891 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6892 return;
6893 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6894 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6895 return;
6896 }
6897 break;
6898 }
6899 case AArch64ISD::LD1x2post: {
6900 if (VT == MVT::v8i8) {
6901 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6902 return;
6903 } else if (VT == MVT::v16i8) {
6904 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6905 return;
6906 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6907 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6908 return;
6909 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6910 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6911 return;
6912 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6913 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6914 return;
6915 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6916 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6917 return;
6918 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6919 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6920 return;
6921 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6922 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6923 return;
6924 }
6925 break;
6926 }
6927 case AArch64ISD::LD1x3post: {
6928 if (VT == MVT::v8i8) {
6929 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6930 return;
6931 } else if (VT == MVT::v16i8) {
6932 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6933 return;
6934 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6935 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6936 return;
6937 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6938 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6939 return;
6940 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6941 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6942 return;
6943 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6944 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6945 return;
6946 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6947 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6948 return;
6949 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6950 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6951 return;
6952 }
6953 break;
6954 }
6955 case AArch64ISD::LD1x4post: {
6956 if (VT == MVT::v8i8) {
6957 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6958 return;
6959 } else if (VT == MVT::v16i8) {
6960 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6961 return;
6962 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6963 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6964 return;
6965 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6966 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6967 return;
6968 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6969 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6970 return;
6971 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6972 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6973 return;
6974 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6975 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6976 return;
6977 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6978 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6979 return;
6980 }
6981 break;
6982 }
6983 case AArch64ISD::LD1DUPpost: {
6984 if (VT == MVT::v8i8) {
6985 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6986 return;
6987 } else if (VT == MVT::v16i8) {
6988 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6989 return;
6990 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6991 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6992 return;
6993 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6994 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6995 return;
6996 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6997 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6998 return;
6999 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7000 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7001 return;
7002 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7003 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7004 return;
7005 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7006 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7007 return;
7008 }
7009 break;
7010 }
7011 case AArch64ISD::LD2DUPpost: {
7012 if (VT == MVT::v8i8) {
7013 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7014 return;
7015 } else if (VT == MVT::v16i8) {
7016 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7017 return;
7018 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7019 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7020 return;
7021 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7022 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7023 return;
7024 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7025 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7026 return;
7027 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7028 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7029 return;
7030 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7031 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7032 return;
7033 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7034 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7035 return;
7036 }
7037 break;
7038 }
7039 case AArch64ISD::LD3DUPpost: {
7040 if (VT == MVT::v8i8) {
7041 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7042 return;
7043 } else if (VT == MVT::v16i8) {
7044 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7045 return;
7046 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7047 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7048 return;
7049 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7050 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7051 return;
7052 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7053 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7054 return;
7055 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7056 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7057 return;
7058 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7059 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7060 return;
7061 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7062 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7063 return;
7064 }
7065 break;
7066 }
7067 case AArch64ISD::LD4DUPpost: {
7068 if (VT == MVT::v8i8) {
7069 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7070 return;
7071 } else if (VT == MVT::v16i8) {
7072 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7073 return;
7074 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7075 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7076 return;
7077 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7078 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7079 return;
7080 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7081 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7082 return;
7083 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7084 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7085 return;
7086 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7087 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7088 return;
7089 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7090 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7091 return;
7092 }
7093 break;
7094 }
7095 case AArch64ISD::LD1LANEpost: {
7096 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7097 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7098 return;
7099 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7100 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7101 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7102 return;
7103 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7104 VT == MVT::v2f32) {
7105 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7106 return;
7107 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7108 VT == MVT::v1f64) {
7109 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7110 return;
7111 }
7112 break;
7113 }
7114 case AArch64ISD::LD2LANEpost: {
7115 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7116 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7117 return;
7118 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7119 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7120 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7121 return;
7122 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7123 VT == MVT::v2f32) {
7124 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7125 return;
7126 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7127 VT == MVT::v1f64) {
7128 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7129 return;
7130 }
7131 break;
7132 }
7133 case AArch64ISD::LD3LANEpost: {
7134 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7135 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7136 return;
7137 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7138 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7139 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7140 return;
7141 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7142 VT == MVT::v2f32) {
7143 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7144 return;
7145 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7146 VT == MVT::v1f64) {
7147 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7148 return;
7149 }
7150 break;
7151 }
7152 case AArch64ISD::LD4LANEpost: {
7153 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7154 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7155 return;
7156 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7157 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7158 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7159 return;
7160 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7161 VT == MVT::v2f32) {
7162 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7163 return;
7164 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7165 VT == MVT::v1f64) {
7166 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7167 return;
7168 }
7169 break;
7170 }
7171 case AArch64ISD::ST2post: {
7172 VT = Node->getOperand(1).getValueType();
7173 if (VT == MVT::v8i8) {
7174 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7175 return;
7176 } else if (VT == MVT::v16i8) {
7177 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7178 return;
7179 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7180 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7181 return;
7182 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7183 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7184 return;
7185 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7186 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7187 return;
7188 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7189 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7190 return;
7191 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7192 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7193 return;
7194 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7195 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7196 return;
7197 }
7198 break;
7199 }
7200 case AArch64ISD::ST3post: {
7201 VT = Node->getOperand(1).getValueType();
7202 if (VT == MVT::v8i8) {
7203 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7204 return;
7205 } else if (VT == MVT::v16i8) {
7206 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7207 return;
7208 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7209 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7210 return;
7211 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7212 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7213 return;
7214 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7215 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7216 return;
7217 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7218 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7219 return;
7220 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7221 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7222 return;
7223 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7224 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7225 return;
7226 }
7227 break;
7228 }
7229 case AArch64ISD::ST4post: {
7230 VT = Node->getOperand(1).getValueType();
7231 if (VT == MVT::v8i8) {
7232 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7233 return;
7234 } else if (VT == MVT::v16i8) {
7235 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7236 return;
7237 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7238 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7239 return;
7240 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7241 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7242 return;
7243 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7244 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7245 return;
7246 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7247 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7248 return;
7249 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7250 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7251 return;
7252 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7253 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7254 return;
7255 }
7256 break;
7257 }
7258 case AArch64ISD::ST1x2post: {
7259 VT = Node->getOperand(1).getValueType();
7260 if (VT == MVT::v8i8) {
7261 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7262 return;
7263 } else if (VT == MVT::v16i8) {
7264 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7265 return;
7266 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7267 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7268 return;
7269 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7270 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7271 return;
7272 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7273 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7274 return;
7275 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7276 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7277 return;
7278 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7279 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7280 return;
7281 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7282 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7283 return;
7284 }
7285 break;
7286 }
7287 case AArch64ISD::ST1x3post: {
7288 VT = Node->getOperand(1).getValueType();
7289 if (VT == MVT::v8i8) {
7290 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7291 return;
7292 } else if (VT == MVT::v16i8) {
7293 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7294 return;
7295 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7296 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7297 return;
7298 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7299 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7300 return;
7301 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7302 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7303 return;
7304 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7305 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7306 return;
7307 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7308 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7309 return;
7310 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7311 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7312 return;
7313 }
7314 break;
7315 }
7316 case AArch64ISD::ST1x4post: {
7317 VT = Node->getOperand(1).getValueType();
7318 if (VT == MVT::v8i8) {
7319 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7320 return;
7321 } else if (VT == MVT::v16i8) {
7322 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7323 return;
7324 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7325 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7326 return;
7327 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7328 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7329 return;
7330 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7331 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7332 return;
7333 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7334 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7335 return;
7336 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7337 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7338 return;
7339 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7340 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7341 return;
7342 }
7343 break;
7344 }
7345 case AArch64ISD::ST2LANEpost: {
7346 VT = Node->getOperand(1).getValueType();
7347 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7348 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7349 return;
7350 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7351 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7352 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7353 return;
7354 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7355 VT == MVT::v2f32) {
7356 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7357 return;
7358 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7359 VT == MVT::v1f64) {
7360 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7361 return;
7362 }
7363 break;
7364 }
7365 case AArch64ISD::ST3LANEpost: {
7366 VT = Node->getOperand(1).getValueType();
7367 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7368 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7369 return;
7370 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7371 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7372 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7373 return;
7374 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7375 VT == MVT::v2f32) {
7376 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7377 return;
7378 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7379 VT == MVT::v1f64) {
7380 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7381 return;
7382 }
7383 break;
7384 }
7385 case AArch64ISD::ST4LANEpost: {
7386 VT = Node->getOperand(1).getValueType();
7387 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7388 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7389 return;
7390 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7391 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7392 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7393 return;
7394 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7395 VT == MVT::v2f32) {
7396 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7397 return;
7398 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7399 VT == MVT::v1f64) {
7400 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7401 return;
7402 }
7403 break;
7404 }
7405 }
7406
7407 // Select the default instruction
7408 SelectCode(Node);
7409}
7410
7411/// createAArch64ISelDag - This pass converts a legalized DAG into a
7412/// AArch64-specific DAG, ready for instruction scheduling.
7414 CodeGenOptLevel OptLevel) {
7415 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7416}
7417
7418/// When \p PredVT is a scalable vector predicate in the form
7419/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7420/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7421/// structured vectors (NumVec >1), the output data type is
7422/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7423/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7424/// EVT.
7426 unsigned NumVec) {
7427 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7428 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7429 return EVT();
7430
7431 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7432 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7433 return EVT();
7434
7435 ElementCount EC = PredVT.getVectorElementCount();
7436 EVT ScalarVT =
7437 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7438 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7439
7440 return MemVT;
7441}
7442
7443/// Return the EVT of the data associated to a memory operation in \p
7444/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7446 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7447 return MemIntr->getMemoryVT();
7448
7449 if (isa<MemSDNode>(Root)) {
7450 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7451
7452 EVT DataVT;
7453 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7454 DataVT = Load->getValueType(0);
7455 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7456 DataVT = Load->getValueType(0);
7457 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7458 DataVT = Store->getValue().getValueType();
7459 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7460 DataVT = Store->getValue().getValueType();
7461 else
7462 llvm_unreachable("Unexpected MemSDNode!");
7463
7464 return DataVT.changeVectorElementType(MemVT.getVectorElementType());
7465 }
7466
7467 const unsigned Opcode = Root->getOpcode();
7468 // For custom ISD nodes, we have to look at them individually to extract the
7469 // type of the data moved to/from memory.
7470 switch (Opcode) {
7471 case AArch64ISD::LD1_MERGE_ZERO:
7472 case AArch64ISD::LD1S_MERGE_ZERO:
7473 case AArch64ISD::LDNF1_MERGE_ZERO:
7474 case AArch64ISD::LDNF1S_MERGE_ZERO:
7475 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7476 case AArch64ISD::ST1_PRED:
7477 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7478 default:
7479 break;
7480 }
7481
7482 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7483 return EVT();
7484
7485 switch (Root->getConstantOperandVal(1)) {
7486 default:
7487 return EVT();
7488 case Intrinsic::aarch64_sme_ldr:
7489 case Intrinsic::aarch64_sme_str:
7490 return MVT::nxv16i8;
7491 case Intrinsic::aarch64_sve_prf:
7492 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7493 // width of the predicate.
7495 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7496 case Intrinsic::aarch64_sve_ld2_sret:
7497 case Intrinsic::aarch64_sve_ld2q_sret:
7499 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7500 case Intrinsic::aarch64_sve_st2q:
7502 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7503 case Intrinsic::aarch64_sve_ld3_sret:
7504 case Intrinsic::aarch64_sve_ld3q_sret:
7506 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7507 case Intrinsic::aarch64_sve_st3q:
7509 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7510 case Intrinsic::aarch64_sve_ld4_sret:
7511 case Intrinsic::aarch64_sve_ld4q_sret:
7513 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7514 case Intrinsic::aarch64_sve_st4q:
7516 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7517 case Intrinsic::aarch64_sve_ld1udq:
7518 case Intrinsic::aarch64_sve_st1dq:
7519 return EVT(MVT::nxv1i64);
7520 case Intrinsic::aarch64_sve_ld1uwq:
7521 case Intrinsic::aarch64_sve_st1wq:
7522 return EVT(MVT::nxv1i32);
7523 }
7524}
7525
7526/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7527/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7528/// where Root is the memory access using N for its address.
7529template <int64_t Min, int64_t Max>
7530bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7531 SDValue &Base,
7532 SDValue &OffImm) {
7533 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7534 const DataLayout &DL = CurDAG->getDataLayout();
7535 const MachineFrameInfo &MFI = MF->getFrameInfo();
7536
7537 if (N.getOpcode() == ISD::FrameIndex) {
7538 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7539 // We can only encode VL scaled offsets, so only fold in frame indexes
7540 // referencing SVE objects.
7541 if (MFI.hasScalableStackID(FI)) {
7542 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7543 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7544 return true;
7545 }
7546
7547 return false;
7548 }
7549
7550 if (MemVT == EVT())
7551 return false;
7552
7553 if (N.getOpcode() != ISD::ADD)
7554 return false;
7555
7556 SDValue VScale = N.getOperand(1);
7557 int64_t MulImm = std::numeric_limits<int64_t>::max();
7558 if (VScale.getOpcode() == ISD::VSCALE) {
7559 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7560 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7561 int64_t ByteOffset = C->getSExtValue();
7562 const auto KnownVScale =
7564
7565 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7566 return false;
7567
7568 MulImm = ByteOffset / KnownVScale;
7569 } else
7570 return false;
7571
7572 TypeSize TS = MemVT.getSizeInBits();
7573 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7574
7575 if ((MulImm % MemWidthBytes) != 0)
7576 return false;
7577
7578 int64_t Offset = MulImm / MemWidthBytes;
7580 return false;
7581
7582 Base = N.getOperand(0);
7583 if (Base.getOpcode() == ISD::FrameIndex) {
7584 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7585 // We can only encode VL scaled offsets, so only fold in frame indexes
7586 // referencing SVE objects.
7587 if (MFI.hasScalableStackID(FI))
7588 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7589 }
7590
7591 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7592 return true;
7593}
7594
7595/// Select register plus register addressing mode for SVE, with scaled
7596/// offset.
7597bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7598 SDValue &Base,
7599 SDValue &Offset) {
7600 if (N.getOpcode() != ISD::ADD)
7601 return false;
7602
7603 // Process an ADD node.
7604 const SDValue LHS = N.getOperand(0);
7605 const SDValue RHS = N.getOperand(1);
7606
7607 // 8 bit data does not come with the SHL node, so it is treated
7608 // separately.
7609 if (Scale == 0) {
7610 Base = LHS;
7611 Offset = RHS;
7612 return true;
7613 }
7614
7615 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7616 int64_t ImmOff = C->getSExtValue();
7617 unsigned Size = 1 << Scale;
7618
7619 // To use the reg+reg addressing mode, the immediate must be a multiple of
7620 // the vector element's byte size.
7621 if (ImmOff % Size)
7622 return false;
7623
7624 SDLoc DL(N);
7625 Base = LHS;
7626 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7627 SDValue Ops[] = {Offset};
7628 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7629 Offset = SDValue(MI, 0);
7630 return true;
7631 }
7632
7633 // Check if the RHS is a shift node with a constant.
7634 if (RHS.getOpcode() != ISD::SHL)
7635 return false;
7636
7637 const SDValue ShiftRHS = RHS.getOperand(1);
7638 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7639 if (C->getZExtValue() == Scale) {
7640 Base = LHS;
7641 Offset = RHS.getOperand(0);
7642 return true;
7643 }
7644
7645 return false;
7646}
7647
7648bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7649 const AArch64TargetLowering *TLI =
7650 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7651
7652 return TLI->isAllActivePredicate(*CurDAG, N);
7653}
7654
7655bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7656 EVT VT = N.getValueType();
7657 return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7658}
7659
7660bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7662 unsigned Scale) {
7663 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7664 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7665 int64_t ImmOff = C->getSExtValue();
7666 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7667 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7668 }
7669 return SDValue();
7670 };
7671
7672 if (SDValue C = MatchConstantOffset(N)) {
7673 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7674 Offset = C;
7675 return true;
7676 }
7677
7678 // Try to untangle an ADD node into a 'reg + offset'
7679 if (CurDAG->isBaseWithConstantOffset(N)) {
7680 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7681 Base = N.getOperand(0);
7682 Offset = C;
7683 return true;
7684 }
7685 }
7686
7687 // By default, just match reg + 0.
7688 Base = N;
7689 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7690 return true;
7691}
7692
7693bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7694 SDValue &Imm) {
7696 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7697 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7698 // Check conservatively if the immediate fits the valid range [0, 64).
7699 // Immediate variants for GE and HS definitely need to be decremented
7700 // when lowering the pseudos later, so an immediate of 1 would become 0.
7701 // For the inverse conditions LT and LO we don't know for sure if they
7702 // will need a decrement but should the decision be made to reverse the
7703 // branch condition, we again end up with the need to decrement.
7704 // The same argument holds for LE, LS, GT and HI and possibly
7705 // incremented immediates. This can lead to slightly less optimal
7706 // codegen, e.g. we never codegen the legal case
7707 // cblt w0, #63, A
7708 // because we could end up with the illegal case
7709 // cbge w0, #64, B
7710 // should the decision to reverse the branch direction be made. For the
7711 // lower bound cases this is no problem since we can express comparisons
7712 // against 0 with either tbz/tnbz or using wzr/xzr.
7713 uint64_t LowerBound = 0, UpperBound = 64;
7714 switch (CC) {
7715 case AArch64CC::GE:
7716 case AArch64CC::HS:
7717 case AArch64CC::LT:
7718 case AArch64CC::LO:
7719 LowerBound = 1;
7720 break;
7721 case AArch64CC::LE:
7722 case AArch64CC::LS:
7723 case AArch64CC::GT:
7724 case AArch64CC::HI:
7725 UpperBound = 63;
7726 break;
7727 default:
7728 break;
7729 }
7730
7731 if (CN->getAPIntValue().uge(LowerBound) &&
7732 CN->getAPIntValue().ult(UpperBound)) {
7733 SDLoc DL(N);
7734 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
7735 return true;
7736 }
7737 }
7738
7739 return false;
7740}
7741
7742template <bool MatchCBB>
7743bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7744 SDValue &ExtType) {
7745
7746 // Use an invalid shift-extend value to indicate we don't need to extend later
7747 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
7748 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
7749 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7750 return false;
7751 Reg = N.getOperand(0);
7752 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
7753 SDLoc(N), MVT::i32);
7754 return true;
7755 }
7756
7758
7759 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
7760 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
7761 Reg = N.getOperand(0);
7762 ExtType =
7763 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
7764 return true;
7765 }
7766
7767 return false;
7768}
unsigned SubReg
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1453
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:963
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:595
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:843
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:983
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:834
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:609
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:840
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:878
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:846
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
@ Undef
Value of the register doesn't matter.
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2016
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.