LLVM 23.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->hasStdExtP())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
891 SDValue Lo, SDValue Hi) {
892 SDValue Ops[] = {
893 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
894 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
895 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
896
897 return SDValue(
898 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0);
899}
900
901// Helper to extract Lo and Hi values from a GPR pair.
902static std::pair<SDValue, SDValue>
904 SDValue Lo =
905 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32, Pair);
906 SDValue Hi =
907 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32, Pair);
908 return {Lo, Hi};
909}
910
911// Try to match WMACC pattern: ADDD where one operand pair comes from a
912// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
914 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
915
916 SDValue Op0Lo = Node->getOperand(0);
917 SDValue Op0Hi = Node->getOperand(1);
918 SDValue Op1Lo = Node->getOperand(2);
919 SDValue Op1Hi = Node->getOperand(3);
920
921 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
922 unsigned Opc = Lo.getOpcode();
923 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
924 Opc != RISCVISD::WMULSU)
925 return false;
926 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
927 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
928 };
929
930 SDNode *MulNode = nullptr;
931 SDValue AddLo, AddHi;
932
933 // Check if first operand pair is a supported multiply with single use.
934 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
935 MulNode = Op0Lo.getNode();
936 AddLo = Op1Lo;
937 AddHi = Op1Hi;
938 }
939 // ADDD is commutative. Check if second operand pair is a supported multiply
940 // with single use.
941 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
942 MulNode = Op1Lo.getNode();
943 AddLo = Op0Lo;
944 AddHi = Op0Hi;
945 } else {
946 return false;
947 }
948
949 unsigned Opc;
950 switch (MulNode->getOpcode()) {
951 default:
952 llvm_unreachable("Unexpected multiply opcode");
953 case ISD::UMUL_LOHI:
954 Opc = RISCV::WMACCU;
955 break;
956 case ISD::SMUL_LOHI:
957 Opc = RISCV::WMACC;
958 break;
959 case RISCVISD::WMULSU:
960 Opc = RISCV::WMACCSU;
961 break;
962 }
963
964 SDValue Acc = buildGPRPair(CurDAG, DL, MVT::Untyped, AddLo, AddHi);
965
966 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
967 SDValue M0 = MulNode->getOperand(0);
968 SDValue M1 = MulNode->getOperand(1);
969 MachineSDNode *New =
970 CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Acc, M0, M1);
971
972 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
975 CurDAG->RemoveDeadNode(Node);
976 return true;
977}
978
979static Register getTileReg(uint64_t TileNum) {
980 assert(TileNum <= 15 && "Invalid tile number");
981 return RISCV::T0 + TileNum;
982}
983
985 if (!Subtarget->hasVInstructions())
986 return;
987
988 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
989
990 SDLoc DL(Node);
991 unsigned IntNo = Node->getConstantOperandVal(1);
992
993 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
994 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
995 "Unexpected vsetvli intrinsic");
996
997 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
998 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
999 SDValue SEWOp =
1000 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
1001 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1002 Node->getOperand(4), Node->getOperand(5),
1003 Node->getOperand(8), SEWOp,
1004 Node->getOperand(0)};
1005
1006 unsigned Opcode;
1007 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1008 switch (LMulSDNode->getSExtValue()) {
1009 case 5:
1010 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1011 : RISCV::PseudoSF_VC_I_SE_MF8;
1012 break;
1013 case 6:
1014 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1015 : RISCV::PseudoSF_VC_I_SE_MF4;
1016 break;
1017 case 7:
1018 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1019 : RISCV::PseudoSF_VC_I_SE_MF2;
1020 break;
1021 case 0:
1022 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1023 : RISCV::PseudoSF_VC_I_SE_M1;
1024 break;
1025 case 1:
1026 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1027 : RISCV::PseudoSF_VC_I_SE_M2;
1028 break;
1029 case 2:
1030 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1031 : RISCV::PseudoSF_VC_I_SE_M4;
1032 break;
1033 case 3:
1034 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1035 : RISCV::PseudoSF_VC_I_SE_M8;
1036 break;
1037 }
1038
1039 ReplaceNode(Node, CurDAG->getMachineNode(
1040 Opcode, DL, Node->getSimpleValueType(0), Operands));
1041}
1042
1043static unsigned getSegInstNF(unsigned Intrinsic) {
1044#define INST_NF_CASE(NAME, NF) \
1045 case Intrinsic::riscv_##NAME##NF: \
1046 return NF;
1047#define INST_NF_CASE_MASK(NAME, NF) \
1048 case Intrinsic::riscv_##NAME##NF##_mask: \
1049 return NF;
1050#define INST_NF_CASE_FF(NAME, NF) \
1051 case Intrinsic::riscv_##NAME##NF##ff: \
1052 return NF;
1053#define INST_NF_CASE_FF_MASK(NAME, NF) \
1054 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1055 return NF;
1056#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1057 MACRO_NAME(NAME, 2) \
1058 MACRO_NAME(NAME, 3) \
1059 MACRO_NAME(NAME, 4) \
1060 MACRO_NAME(NAME, 5) \
1061 MACRO_NAME(NAME, 6) \
1062 MACRO_NAME(NAME, 7) \
1063 MACRO_NAME(NAME, 8)
1064#define INST_ALL_NF_CASE(NAME) \
1065 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1066 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1067#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1068 INST_ALL_NF_CASE(NAME) \
1069 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1070 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1071 switch (Intrinsic) {
1072 default:
1073 llvm_unreachable("Unexpected segment load/store intrinsic");
1075 INST_ALL_NF_CASE(vlsseg)
1076 INST_ALL_NF_CASE(vloxseg)
1077 INST_ALL_NF_CASE(vluxseg)
1078 INST_ALL_NF_CASE(vsseg)
1079 INST_ALL_NF_CASE(vssseg)
1080 INST_ALL_NF_CASE(vsoxseg)
1081 INST_ALL_NF_CASE(vsuxseg)
1082 }
1083}
1084
1085static bool isApplicableToPLIOrPLUI(int Val) {
1086 // Check if the immediate is packed i8 or i10
1087 int16_t Bit31To16 = Val >> 16;
1088 int16_t Bit15To0 = Val;
1089 int8_t Bit15To8 = Bit15To0 >> 8;
1090 int8_t Bit7To0 = Val;
1091 if (Bit31To16 != Bit15To0)
1092 return false;
1093
1094 return isInt<10>(Bit15To0) || isShiftedInt<10, 6>(Bit15To0) ||
1095 Bit15To8 == Bit7To0;
1096}
1097
1099 // If we have a custom node, we have already selected.
1100 if (Node->isMachineOpcode()) {
1101 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1102 Node->setNodeId(-1);
1103 return;
1104 }
1105
1106 // Instruction Selection not handled by the auto-generated tablegen selection
1107 // should be handled here.
1108 unsigned Opcode = Node->getOpcode();
1109 MVT XLenVT = Subtarget->getXLenVT();
1110 SDLoc DL(Node);
1111 MVT VT = Node->getSimpleValueType(0);
1112
1113 bool HasBitTest = Subtarget->hasBEXTILike();
1114
1115 switch (Opcode) {
1116 case ISD::Constant: {
1117 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1118 auto *ConstNode = cast<ConstantSDNode>(Node);
1119 if (ConstNode->isZero()) {
1120 SDValue New =
1121 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1122 ReplaceNode(Node, New.getNode());
1123 return;
1124 }
1125 int64_t Imm = ConstNode->getSExtValue();
1126 // If only the lower 8 bits are used, try to convert this to a simm6 by
1127 // sign-extending bit 7. This is neutral without the C extension, and
1128 // allows C.LI to be used if C is present.
1129 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1131 Imm = SignExtend64<8>(Imm);
1132 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1133 // by sign extending bit 15.
1134 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1136 Imm = SignExtend64<16>(Imm);
1137
1138 // If the upper XLen-16 bits are not used, the lower 2 bytes are the same,
1139 // and we can't use li, convert to an xlen splat so we can use pli.b.
1140 if (Subtarget->hasStdExtP() && !isInt<12>(Imm) &&
1141 (Imm & 0xff) == ((Imm >> 8) & 0xff) && hasAllHUsers(Node)) {
1142 // Splat the lower 16 bits to XLen. Sign extend for RV32.
1143 uint64_t Splat = Imm & 0xffff;
1144 Splat = (Splat << 16) | Splat;
1145 if (VT == MVT::i64)
1146 Imm = Splat << 32 | Splat;
1147 else
1148 Imm = SignExtend64<32>(Splat);
1149 } else {
1150 // If the upper 32-bits are not used try to convert this into a simm32 by
1151 // sign extending bit 32.
1152 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1153 Imm = SignExtend64<32>(Imm);
1154
1155 if (VT == MVT::i64 && !isInt<12>(Imm) && !isShiftedInt<20, 12>(Imm) &&
1156 Subtarget->hasStdExtP() && isApplicableToPLIOrPLUI(Imm) &&
1157 hasAllWUsers(Node)) {
1158 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers,
1159 // we can simply copy lower 32 bits to higher 32 bits to make it able to
1160 // rematerialize to PLI_B or PLI_H
1161 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1162 }
1163 }
1164
1165 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1166 return;
1167 }
1168 case ISD::ConstantFP: {
1169 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1170
1171 bool Is64Bit = Subtarget->is64Bit();
1172 bool HasZdinx = Subtarget->hasStdExtZdinx();
1173
1174 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1175 SDValue Imm;
1176 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1177 // create an integer immediate.
1178 if (APF.isPosZero() || NegZeroF64) {
1179 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1180 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1181 else
1182 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1183 } else {
1184 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1185 *Subtarget);
1186 }
1187
1188 unsigned Opc;
1189 switch (VT.SimpleTy) {
1190 default:
1191 llvm_unreachable("Unexpected size");
1192 case MVT::bf16:
1193 assert(Subtarget->hasStdExtZfbfmin());
1194 Opc = RISCV::FMV_H_X;
1195 break;
1196 case MVT::f16:
1197 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1198 break;
1199 case MVT::f32:
1200 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1201 break;
1202 case MVT::f64:
1203 // For RV32, we can't move from a GPR, we need to convert instead. This
1204 // should only happen for +0.0 and -0.0.
1205 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1206 if (HasZdinx)
1207 Opc = RISCV::COPY;
1208 else
1209 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1210 break;
1211 }
1212
1213 SDNode *Res;
1214 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1215 Res =
1216 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1217 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1218 Res =
1219 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1220 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1221 Res = CurDAG->getMachineNode(
1222 Opc, DL, VT, Imm,
1223 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1224 else
1225 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1226
1227 // For f64 -0.0, we need to insert a fneg.d idiom.
1228 if (NegZeroF64) {
1229 Opc = RISCV::FSGNJN_D;
1230 if (HasZdinx)
1231 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1232 Res =
1233 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1234 }
1235
1236 ReplaceNode(Node, Res);
1237 return;
1238 }
1239 case RISCVISD::BuildGPRPair:
1240 case RISCVISD::BuildPairF64:
1241 case RISCVISD::BuildPairGPRVec: {
1242 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1243 break;
1244
1245 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::BuildPairF64) &&
1246 "BuildPairF64 only handled here on rv32i_zdinx");
1247
1248 SDValue N =
1249 buildGPRPair(CurDAG, DL, VT, Node->getOperand(0), Node->getOperand(1));
1250 ReplaceNode(Node, N.getNode());
1251 return;
1252 }
1253 case RISCVISD::SplitGPRPair:
1254 case RISCVISD::SplitF64:
1255 case RISCVISD::SplitGPRVec: {
1256 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1257 assert((!Subtarget->is64Bit() || Opcode != RISCVISD::SplitF64) &&
1258 "SplitF64 only handled here on rv32i_zdinx");
1259
1260 if (!SDValue(Node, 0).use_empty()) {
1261 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1262 Node->getValueType(0),
1263 Node->getOperand(0));
1264 ReplaceUses(SDValue(Node, 0), Lo);
1265 }
1266
1267 if (!SDValue(Node, 1).use_empty()) {
1268 SDValue Hi = CurDAG->getTargetExtractSubreg(
1269 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1270 ReplaceUses(SDValue(Node, 1), Hi);
1271 }
1272
1273 CurDAG->RemoveDeadNode(Node);
1274 return;
1275 }
1276
1277 if (!Subtarget->hasStdExtZfa())
1278 break;
1279 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1280 "Unexpected subtarget");
1281
1282 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1283 if (!SDValue(Node, 0).use_empty()) {
1284 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1285 Node->getOperand(0));
1286 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1287 }
1288 if (!SDValue(Node, 1).use_empty()) {
1289 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1290 Node->getOperand(0));
1291 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1292 }
1293
1294 CurDAG->RemoveDeadNode(Node);
1295 return;
1296 }
1297 case ISD::SHL: {
1298 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1299 if (!N1C)
1300 break;
1301 SDValue N0 = Node->getOperand(0);
1302 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1304 break;
1305 unsigned ShAmt = N1C->getZExtValue();
1306 uint64_t Mask = N0.getConstantOperandVal(1);
1307
1308 if (isShiftedMask_64(Mask)) {
1309 unsigned XLen = Subtarget->getXLen();
1310 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1311 unsigned TrailingZeros = llvm::countr_zero(Mask);
1312 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1313 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1314 // where C2 has 32 leading zeros and C3 trailing zeros.
1315 SDNode *SRLIW = CurDAG->getMachineNode(
1316 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1317 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1318 SDNode *SLLI = CurDAG->getMachineNode(
1319 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1320 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1321 ReplaceNode(Node, SLLI);
1322 return;
1323 }
1324 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1325 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1326 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1327 // where C2 has C4 leading zeros and no trailing zeros.
1328 // This is profitable if the "and" was to be lowered to
1329 // (srli (slli X, C4), C4) and not (andi X, C2).
1330 // For "LeadingZeros == 32":
1331 // - with Zba it's just (slli.uw X, C)
1332 // - without Zba a tablegen pattern applies the very same
1333 // transform as we would have done here
1334 SDNode *SLLI = CurDAG->getMachineNode(
1335 RISCV::SLLI, DL, VT, N0.getOperand(0),
1336 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1337 SDNode *SRLI = CurDAG->getMachineNode(
1338 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1339 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1340 ReplaceNode(Node, SRLI);
1341 return;
1342 }
1343 }
1344 break;
1345 }
1346 case ISD::SRL: {
1347 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1348 if (!N1C)
1349 break;
1350 SDValue N0 = Node->getOperand(0);
1351 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1352 break;
1353 unsigned ShAmt = N1C->getZExtValue();
1354 uint64_t Mask = N0.getConstantOperandVal(1);
1355
1356 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1357 // 32 leading zeros and C3 trailing zeros.
1358 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1359 unsigned XLen = Subtarget->getXLen();
1360 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1361 unsigned TrailingZeros = llvm::countr_zero(Mask);
1362 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1363 SDNode *SRLIW = CurDAG->getMachineNode(
1364 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1365 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1366 SDNode *SLLI = CurDAG->getMachineNode(
1367 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1368 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1369 ReplaceNode(Node, SLLI);
1370 return;
1371 }
1372 }
1373
1374 // Optimize (srl (and X, C2), C) ->
1375 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1376 // Where C2 is a mask with C3 trailing ones.
1377 // Taking into account that the C2 may have had lower bits unset by
1378 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1379 // This pattern occurs when type legalizing right shifts for types with
1380 // less than XLen bits.
1381 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1382 if (!isMask_64(Mask))
1383 break;
1384 unsigned TrailingOnes = llvm::countr_one(Mask);
1385 if (ShAmt >= TrailingOnes)
1386 break;
1387 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1388 if (TrailingOnes == 32) {
1389 SDNode *SRLI = CurDAG->getMachineNode(
1390 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1391 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1392 ReplaceNode(Node, SRLI);
1393 return;
1394 }
1395
1396 // Only do the remaining transforms if the AND has one use.
1397 if (!N0.hasOneUse())
1398 break;
1399
1400 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1401 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1402 SDNode *BEXTI = CurDAG->getMachineNode(
1403 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1404 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1405 ReplaceNode(Node, BEXTI);
1406 return;
1407 }
1408
1409 const unsigned Msb = TrailingOnes - 1;
1410 const unsigned Lsb = ShAmt;
1411 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1412 return;
1413
1414 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1415 SDNode *SLLI =
1416 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1417 CurDAG->getTargetConstant(LShAmt, DL, VT));
1418 SDNode *SRLI = CurDAG->getMachineNode(
1419 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1420 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1421 ReplaceNode(Node, SRLI);
1422 return;
1423 }
1424 case ISD::SRA: {
1426 return;
1427
1429 return;
1430
1431 // Optimize (sra (sext_inreg X, i16), C) ->
1432 // (srai (slli X, (XLen-16), (XLen-16) + C)
1433 // And (sra (sext_inreg X, i8), C) ->
1434 // (srai (slli X, (XLen-8), (XLen-8) + C)
1435 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1436 // This transform matches the code we get without Zbb. The shifts are more
1437 // compressible, and this can help expose CSE opportunities in the sdiv by
1438 // constant optimization.
1439 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1440 if (!N1C)
1441 break;
1442 SDValue N0 = Node->getOperand(0);
1443 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1444 break;
1445 unsigned ShAmt = N1C->getZExtValue();
1446 unsigned ExtSize =
1447 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1448 // ExtSize of 32 should use sraiw via tablegen pattern.
1449 if (ExtSize >= 32 || ShAmt >= ExtSize)
1450 break;
1451 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1452 SDNode *SLLI =
1453 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1454 CurDAG->getTargetConstant(LShAmt, DL, VT));
1455 SDNode *SRAI = CurDAG->getMachineNode(
1456 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1457 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1458 ReplaceNode(Node, SRAI);
1459 return;
1460 }
1462 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1463 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1464 // This is a bitfield extract pattern where we're extracting a signed
1465 // 8-bit or 16-bit field from position C.
1466 SDValue N0 = Node->getOperand(0);
1467 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1468 break;
1469
1470 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1471 if (!ShAmtC)
1472 break;
1473
1474 unsigned ExtSize =
1475 cast<VTSDNode>(Node->getOperand(1))->getVT().getSizeInBits();
1476 unsigned ShAmt = ShAmtC->getZExtValue();
1477 unsigned XLen = Subtarget->getXLen();
1478
1479 // Only handle types less than 32, and make sure the shift amount is valid.
1480 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1481 break;
1482
1483 unsigned LShAmt = XLen - ExtSize - ShAmt;
1484 SDNode *SLLI =
1485 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1486 CurDAG->getTargetConstant(LShAmt, DL, VT));
1487 SDNode *SRAI = CurDAG->getMachineNode(
1488 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1489 CurDAG->getTargetConstant(XLen - ExtSize, DL, VT));
1490 ReplaceNode(Node, SRAI);
1491 return;
1492 }
1493 case ISD::OR: {
1495 return;
1496
1497 break;
1498 }
1499 case ISD::XOR:
1501 return;
1502
1503 break;
1504 case ISD::AND: {
1505 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1506 if (!N1C)
1507 break;
1508
1509 SDValue N0 = Node->getOperand(0);
1510
1511 bool LeftShift = N0.getOpcode() == ISD::SHL;
1512 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1513 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1514 if (!C)
1515 break;
1516 unsigned C2 = C->getZExtValue();
1517 unsigned XLen = Subtarget->getXLen();
1518 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1519
1520 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1521 // shift pair might offer more compression opportunities.
1522 // TODO: We could check for C extension here, but we don't have many lit
1523 // tests with the C extension enabled so not checking gets better
1524 // coverage.
1525 // TODO: What if ANDI faster than shift?
1526 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1527
1528 uint64_t C1 = N1C->getZExtValue();
1529
1530 // Clear irrelevant bits in the mask.
1531 if (LeftShift)
1533 else
1534 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1535
1536 // Some transforms should only be done if the shift has a single use or
1537 // the AND would become (srli (slli X, 32), 32)
1538 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1539
1540 SDValue X = N0.getOperand(0);
1541
1542 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1543 // with c3 leading zeros.
1544 if (!LeftShift && isMask_64(C1)) {
1545 unsigned Leading = XLen - llvm::bit_width(C1);
1546 if (C2 < Leading) {
1547 // If the number of leading zeros is C2+32 this can be SRLIW.
1548 if (C2 + 32 == Leading) {
1549 SDNode *SRLIW = CurDAG->getMachineNode(
1550 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1551 ReplaceNode(Node, SRLIW);
1552 return;
1553 }
1554
1555 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1556 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1557 //
1558 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1559 // legalized and goes through DAG combine.
1560 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1561 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1562 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1563 SDNode *SRAIW =
1564 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1565 CurDAG->getTargetConstant(31, DL, VT));
1566 SDNode *SRLIW = CurDAG->getMachineNode(
1567 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1568 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1569 ReplaceNode(Node, SRLIW);
1570 return;
1571 }
1572
1573 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1574 // available.
1575 // Transform (and (srl x, C2), C1)
1576 // -> (<bfextract> x, msb, lsb)
1577 //
1578 // Make sure to keep this below the SRLIW cases, as we always want to
1579 // prefer the more common instruction.
1580 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1581 const unsigned Lsb = C2;
1582 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1583 return;
1584
1585 // (srli (slli x, c3-c2), c3).
1586 // Skip if we could use (zext.w (sraiw X, C2)).
1587 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1588 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1589 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1590 // Also Skip if we can use bexti or th.tst.
1591 Skip |= HasBitTest && Leading == XLen - 1;
1592 if (OneUseOrZExtW && !Skip) {
1593 SDNode *SLLI = CurDAG->getMachineNode(
1594 RISCV::SLLI, DL, VT, X,
1595 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1596 SDNode *SRLI = CurDAG->getMachineNode(
1597 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1598 CurDAG->getTargetConstant(Leading, DL, VT));
1599 ReplaceNode(Node, SRLI);
1600 return;
1601 }
1602 }
1603 }
1604
1605 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1606 // shifted by c2 bits with c3 leading zeros.
1607 if (LeftShift && isShiftedMask_64(C1)) {
1608 unsigned Leading = XLen - llvm::bit_width(C1);
1609
1610 if (C2 + Leading < XLen &&
1611 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1612 // Use slli.uw when possible.
1613 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1614 SDNode *SLLI_UW =
1615 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1616 CurDAG->getTargetConstant(C2, DL, VT));
1617 ReplaceNode(Node, SLLI_UW);
1618 return;
1619 }
1620
1621 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1622 // available.
1623 // Transform (and (shl x, c2), c1)
1624 // -> (<bfinsert> x, msb, lsb)
1625 // e.g.
1626 // (and (shl x, 12), 0x00fff000)
1627 // If XLen = 32 and C2 = 12, then
1628 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1629 const unsigned Msb = XLen - Leading - 1;
1630 const unsigned Lsb = C2;
1631 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1632 return;
1633
1634 if (OneUseOrZExtW && !IsCANDI) {
1635 // (packh x0, X)
1636 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1637 SDNode *PACKH = CurDAG->getMachineNode(
1638 RISCV::PACKH, DL, VT,
1639 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1640 ReplaceNode(Node, PACKH);
1641 return;
1642 }
1643 // (srli (slli c2+c3), c3)
1644 SDNode *SLLI = CurDAG->getMachineNode(
1645 RISCV::SLLI, DL, VT, X,
1646 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1647 SDNode *SRLI = CurDAG->getMachineNode(
1648 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1649 CurDAG->getTargetConstant(Leading, DL, VT));
1650 ReplaceNode(Node, SRLI);
1651 return;
1652 }
1653 }
1654 }
1655
1656 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1657 // shifted mask with c2 leading zeros and c3 trailing zeros.
1658 if (!LeftShift && isShiftedMask_64(C1)) {
1659 unsigned Leading = XLen - llvm::bit_width(C1);
1660 unsigned Trailing = llvm::countr_zero(C1);
1661 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1662 !IsCANDI) {
1663 unsigned SrliOpc = RISCV::SRLI;
1664 // If the input is zexti32 we should use SRLIW.
1665 if (X.getOpcode() == ISD::AND &&
1666 isa<ConstantSDNode>(X.getOperand(1)) &&
1667 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1668 SrliOpc = RISCV::SRLIW;
1669 X = X.getOperand(0);
1670 }
1671 SDNode *SRLI = CurDAG->getMachineNode(
1672 SrliOpc, DL, VT, X,
1673 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1674 SDNode *SLLI = CurDAG->getMachineNode(
1675 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1676 CurDAG->getTargetConstant(Trailing, DL, VT));
1677 ReplaceNode(Node, SLLI);
1678 return;
1679 }
1680 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1681 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1682 OneUseOrZExtW && !IsCANDI) {
1683 SDNode *SRLIW = CurDAG->getMachineNode(
1684 RISCV::SRLIW, DL, VT, X,
1685 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1686 SDNode *SLLI = CurDAG->getMachineNode(
1687 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1688 CurDAG->getTargetConstant(Trailing, DL, VT));
1689 ReplaceNode(Node, SLLI);
1690 return;
1691 }
1692 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1693 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1694 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1695 SDNode *SRLI = CurDAG->getMachineNode(
1696 RISCV::SRLI, DL, VT, X,
1697 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1698 SDNode *SLLI_UW = CurDAG->getMachineNode(
1699 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1700 CurDAG->getTargetConstant(Trailing, DL, VT));
1701 ReplaceNode(Node, SLLI_UW);
1702 return;
1703 }
1704 }
1705
1706 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1707 // shifted mask with no leading zeros and c3 trailing zeros.
1708 if (LeftShift && isShiftedMask_64(C1)) {
1709 unsigned Leading = XLen - llvm::bit_width(C1);
1710 unsigned Trailing = llvm::countr_zero(C1);
1711 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1712 SDNode *SRLI = CurDAG->getMachineNode(
1713 RISCV::SRLI, DL, VT, X,
1714 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1715 SDNode *SLLI = CurDAG->getMachineNode(
1716 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1717 CurDAG->getTargetConstant(Trailing, DL, VT));
1718 ReplaceNode(Node, SLLI);
1719 return;
1720 }
1721 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1722 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1723 SDNode *SRLIW = CurDAG->getMachineNode(
1724 RISCV::SRLIW, DL, VT, X,
1725 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1726 SDNode *SLLI = CurDAG->getMachineNode(
1727 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1728 CurDAG->getTargetConstant(Trailing, DL, VT));
1729 ReplaceNode(Node, SLLI);
1730 return;
1731 }
1732
1733 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1734 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1735 Subtarget->hasStdExtZba()) {
1736 SDNode *SRLI = CurDAG->getMachineNode(
1737 RISCV::SRLI, DL, VT, X,
1738 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1739 SDNode *SLLI_UW = CurDAG->getMachineNode(
1740 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1741 CurDAG->getTargetConstant(Trailing, DL, VT));
1742 ReplaceNode(Node, SLLI_UW);
1743 return;
1744 }
1745 }
1746 }
1747
1748 const uint64_t C1 = N1C->getZExtValue();
1749
1750 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1751 N0.hasOneUse()) {
1752 unsigned C2 = N0.getConstantOperandVal(1);
1753 unsigned XLen = Subtarget->getXLen();
1754 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1755
1756 SDValue X = N0.getOperand(0);
1757
1758 // Prefer SRAIW + ANDI when possible.
1759 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1760 X.getOpcode() == ISD::SHL &&
1761 isa<ConstantSDNode>(X.getOperand(1)) &&
1762 X.getConstantOperandVal(1) == 32;
1763 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1764 // mask with c3 leading zeros and c2 is larger than c3.
1765 if (isMask_64(C1) && !Skip) {
1766 unsigned Leading = XLen - llvm::bit_width(C1);
1767 if (C2 > Leading) {
1768 SDNode *SRAI = CurDAG->getMachineNode(
1769 RISCV::SRAI, DL, VT, X,
1770 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1771 SDNode *SRLI = CurDAG->getMachineNode(
1772 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1773 CurDAG->getTargetConstant(Leading, DL, VT));
1774 ReplaceNode(Node, SRLI);
1775 return;
1776 }
1777 }
1778
1779 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1780 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1781 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1782 if (isShiftedMask_64(C1) && !Skip) {
1783 unsigned Leading = XLen - llvm::bit_width(C1);
1784 unsigned Trailing = llvm::countr_zero(C1);
1785 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1786 SDNode *SRAI = CurDAG->getMachineNode(
1787 RISCV::SRAI, DL, VT, N0.getOperand(0),
1788 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1789 SDNode *SRLI = CurDAG->getMachineNode(
1790 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1791 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1792 SDNode *SLLI = CurDAG->getMachineNode(
1793 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1794 CurDAG->getTargetConstant(Trailing, DL, VT));
1795 ReplaceNode(Node, SLLI);
1796 return;
1797 }
1798 }
1799 }
1800
1801 // If C1 masks off the upper bits only (but can't be formed as an
1802 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1803 // available.
1804 // Transform (and x, C1)
1805 // -> (<bfextract> x, msb, lsb)
1806 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1807 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1808 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1809 const unsigned Msb = llvm::bit_width(C1) - 1;
1810 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1811 return;
1812 }
1813
1815 return;
1816
1817 break;
1818 }
1819 case ISD::MUL: {
1820 // Special case for calculating (mul (and X, C2), C1) where the full product
1821 // fits in XLen bits. We can shift X left by the number of leading zeros in
1822 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1823 // product has XLen trailing zeros, putting it in the output of MULHU. This
1824 // can avoid materializing a constant in a register for C2.
1825
1826 // RHS should be a constant.
1827 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1828 if (!N1C || !N1C->hasOneUse())
1829 break;
1830
1831 // LHS should be an AND with constant.
1832 SDValue N0 = Node->getOperand(0);
1833 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1834 break;
1835
1837
1838 // Constant should be a mask.
1839 if (!isMask_64(C2))
1840 break;
1841
1842 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1843 // multiple users or the constant is a simm12. This prevents inserting a
1844 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1845 // make it more costly to materialize. Otherwise, using a SLLI might allow
1846 // it to be compressed.
1847 bool IsANDIOrZExt =
1848 isInt<12>(C2) ||
1849 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1850 // With XTHeadBb, we can use TH.EXTU.
1851 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1852 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1853 break;
1854 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1855 // the constant is a simm32.
1856 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1857 // With XTHeadBb, we can use TH.EXTU.
1858 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1859 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1860 break;
1861
1862 // We need to shift left the AND input and C1 by a total of XLen bits.
1863
1864 // How far left do we need to shift the AND input?
1865 unsigned XLen = Subtarget->getXLen();
1866 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1867
1868 // The constant gets shifted by the remaining amount unless that would
1869 // shift bits out.
1870 uint64_t C1 = N1C->getZExtValue();
1871 unsigned ConstantShift = XLen - LeadingZeros;
1872 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1873 break;
1874
1875 uint64_t ShiftedC1 = C1 << ConstantShift;
1876 // If this RV32, we need to sign extend the constant.
1877 if (XLen == 32)
1878 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1879
1880 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1881 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1882 SDNode *SLLI =
1883 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1884 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1885 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1886 SDValue(SLLI, 0), SDValue(Imm, 0));
1887 ReplaceNode(Node, MULHU);
1888 return;
1889 }
1890 case ISD::SMUL_LOHI:
1891 case ISD::UMUL_LOHI:
1892 case RISCVISD::WMULSU:
1893 case RISCVISD::WADDU:
1894 case RISCVISD::WSUBU: {
1895 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1896 "Unexpected opcode");
1897
1898 unsigned Opc;
1899 switch (Node->getOpcode()) {
1900 default:
1901 llvm_unreachable("Unexpected opcode");
1902 case ISD::SMUL_LOHI:
1903 Opc = RISCV::WMUL;
1904 break;
1905 case ISD::UMUL_LOHI:
1906 Opc = RISCV::WMULU;
1907 break;
1908 case RISCVISD::WMULSU:
1909 Opc = RISCV::WMULSU;
1910 break;
1911 case RISCVISD::WADDU:
1912 Opc = RISCV::WADDU;
1913 break;
1914 case RISCVISD::WSUBU:
1915 Opc = RISCV::WSUBU;
1916 break;
1917 }
1918
1919 SDNode *Result = CurDAG->getMachineNode(
1920 Opc, DL, MVT::Untyped, Node->getOperand(0), Node->getOperand(1));
1921
1922 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(Result, 0));
1923 ReplaceUses(SDValue(Node, 0), Lo);
1924 ReplaceUses(SDValue(Node, 1), Hi);
1925 CurDAG->RemoveDeadNode(Node);
1926 return;
1927 }
1928 case RISCVISD::WSLL:
1929 case RISCVISD::WSLA: {
1930 // Custom select WSLL/WSLA for RV32P.
1931 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1932 "Unexpected opcode");
1933
1934 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1935
1936 SDValue ShAmt = Node->getOperand(1);
1937
1938 unsigned Opc;
1939
1940 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1941 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1942 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1943 ShAmt = CurDAG->getTargetConstant(ShAmtC->getZExtValue(), DL, XLenVT);
1944 } else {
1945 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1946 }
1947
1948 SDNode *WShift = CurDAG->getMachineNode(Opc, DL, MVT::Untyped,
1949 Node->getOperand(0), ShAmt);
1950
1951 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WShift, 0));
1952 ReplaceUses(SDValue(Node, 0), Lo);
1953 ReplaceUses(SDValue(Node, 1), Hi);
1954 CurDAG->RemoveDeadNode(Node);
1955 return;
1956 }
1957 case ISD::LOAD: {
1958 if (tryIndexedLoad(Node))
1959 return;
1960
1961 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1962 // We match post-incrementing load here
1964 if (Load->getAddressingMode() != ISD::POST_INC)
1965 break;
1966
1967 SDValue Chain = Node->getOperand(0);
1968 SDValue Base = Node->getOperand(1);
1969 SDValue Offset = Node->getOperand(2);
1970
1971 bool Simm12 = false;
1972 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1973
1974 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1975 int ConstantVal = ConstantOffset->getSExtValue();
1976 Simm12 = isInt<12>(ConstantVal);
1977 if (Simm12)
1978 Offset = CurDAG->getSignedTargetConstant(ConstantVal, SDLoc(Offset),
1979 Offset.getValueType());
1980 }
1981
1982 unsigned Opcode = 0;
1983 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1984 case MVT::i8:
1985 if (Simm12 && SignExtend)
1986 Opcode = RISCV::CV_LB_ri_inc;
1987 else if (Simm12 && !SignExtend)
1988 Opcode = RISCV::CV_LBU_ri_inc;
1989 else if (!Simm12 && SignExtend)
1990 Opcode = RISCV::CV_LB_rr_inc;
1991 else
1992 Opcode = RISCV::CV_LBU_rr_inc;
1993 break;
1994 case MVT::i16:
1995 if (Simm12 && SignExtend)
1996 Opcode = RISCV::CV_LH_ri_inc;
1997 else if (Simm12 && !SignExtend)
1998 Opcode = RISCV::CV_LHU_ri_inc;
1999 else if (!Simm12 && SignExtend)
2000 Opcode = RISCV::CV_LH_rr_inc;
2001 else
2002 Opcode = RISCV::CV_LHU_rr_inc;
2003 break;
2004 case MVT::i32:
2005 if (Simm12)
2006 Opcode = RISCV::CV_LW_ri_inc;
2007 else
2008 Opcode = RISCV::CV_LW_rr_inc;
2009 break;
2010 default:
2011 break;
2012 }
2013 if (!Opcode)
2014 break;
2015
2016 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
2017 Chain.getSimpleValueType(), Base,
2018 Offset, Chain));
2019 return;
2020 }
2021 break;
2022 }
2023 case RISCVISD::LD_RV32: {
2024 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2025
2027 SDValue Chain = Node->getOperand(0);
2028 SDValue Addr = Node->getOperand(1);
2030
2031 SDValue Ops[] = {Base, Offset, Chain};
2032 MachineSDNode *New = CurDAG->getMachineNode(
2033 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
2034 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2035 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2036 ReplaceUses(SDValue(Node, 0), Lo);
2037 ReplaceUses(SDValue(Node, 1), Hi);
2038 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
2039 CurDAG->RemoveDeadNode(Node);
2040 return;
2041 }
2042 case RISCVISD::SD_RV32: {
2044 SDValue Chain = Node->getOperand(0);
2045 SDValue Addr = Node->getOperand(3);
2047
2048 SDValue Lo = Node->getOperand(1);
2049 SDValue Hi = Node->getOperand(2);
2050
2051 SDValue RegPair;
2052 // Peephole to use X0_Pair for storing zero.
2054 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2055 } else {
2056 RegPair = buildGPRPair(CurDAG, DL, MVT::Untyped, Lo, Hi);
2057 }
2058
2059 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
2060 {RegPair, Base, Offset, Chain});
2061 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2062 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
2063 CurDAG->RemoveDeadNode(Node);
2064 return;
2065 }
2066 case RISCVISD::ADDD:
2067 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2068 // widening multiply.
2070 return;
2071
2072 // Fall through to regular ADDD selection.
2073 [[fallthrough]];
2074 case RISCVISD::SUBD:
2075 case RISCVISD::PPAIRE_DB:
2076 case RISCVISD::WADDAU:
2077 case RISCVISD::WSUBAU: {
2078 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2079 assert(
2080 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2081 "Unexpected opcode");
2082
2083 SDValue Op0Lo = Node->getOperand(0);
2084 SDValue Op0Hi = Node->getOperand(1);
2085
2086 SDValue Op0;
2087 if (isNullConstant(Op0Lo) && isNullConstant(Op0Hi)) {
2088 Op0 = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2089 } else {
2090 Op0 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op0Lo, Op0Hi);
2091 }
2092
2093 SDValue Op1Lo = Node->getOperand(2);
2094 SDValue Op1Hi = Node->getOperand(3);
2095
2096 MachineSDNode *New;
2097 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2098 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2099 // the two 32-bit values.
2100 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2101 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1Lo, Op1Hi);
2102 } else {
2103 SDValue Op1 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op1Lo, Op1Hi);
2104
2105 unsigned Opc;
2106 switch (Opcode) {
2107 default:
2108 llvm_unreachable("Unexpected opcode");
2109 case RISCVISD::ADDD:
2110 Opc = RISCV::ADDD;
2111 break;
2112 case RISCVISD::SUBD:
2113 Opc = RISCV::SUBD;
2114 break;
2115 case RISCVISD::PPAIRE_DB:
2116 Opc = RISCV::PPAIRE_DB;
2117 break;
2118 }
2119 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1);
2120 }
2121
2122 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2123 ReplaceUses(SDValue(Node, 0), Lo);
2124 ReplaceUses(SDValue(Node, 1), Hi);
2125 CurDAG->RemoveDeadNode(Node);
2126 return;
2127 }
2129 unsigned IntNo = Node->getConstantOperandVal(0);
2130 switch (IntNo) {
2131 // By default we do not custom select any intrinsic.
2132 default:
2133 break;
2134 case Intrinsic::riscv_vmsgeu:
2135 case Intrinsic::riscv_vmsge: {
2136 SDValue Src1 = Node->getOperand(1);
2137 SDValue Src2 = Node->getOperand(2);
2138 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2139 bool IsCmpConstant = false;
2140 bool IsCmpMinimum = false;
2141 // Only custom select scalar second operand.
2142 if (Src2.getValueType() != XLenVT)
2143 break;
2144 // Small constants are handled with patterns.
2145 int64_t CVal = 0;
2146 MVT Src1VT = Src1.getSimpleValueType();
2147 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2148 IsCmpConstant = true;
2149 CVal = C->getSExtValue();
2150 if (CVal >= -15 && CVal <= 16) {
2151 if (!IsUnsigned || CVal != 0)
2152 break;
2153 IsCmpMinimum = true;
2154 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2155 Src1VT.getScalarSizeInBits())
2156 .getSExtValue()) {
2157 IsCmpMinimum = true;
2158 }
2159 }
2160 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2161 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2162 default:
2163 llvm_unreachable("Unexpected LMUL!");
2164#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2165 case RISCVVType::lmulenum: \
2166 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2167 : RISCV::PseudoVMSLT_VX_##suffix; \
2168 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2169 : RISCV::PseudoVMSGT_VX_##suffix; \
2170 break;
2171 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2172 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2173 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2174 CASE_VMSLT_OPCODES(LMUL_1, M1)
2175 CASE_VMSLT_OPCODES(LMUL_2, M2)
2176 CASE_VMSLT_OPCODES(LMUL_4, M4)
2177 CASE_VMSLT_OPCODES(LMUL_8, M8)
2178#undef CASE_VMSLT_OPCODES
2179 }
2180 // Mask operations use the LMUL from the mask type.
2181 switch (RISCVTargetLowering::getLMUL(VT)) {
2182 default:
2183 llvm_unreachable("Unexpected LMUL!");
2184#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2185 case RISCVVType::lmulenum: \
2186 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2187 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2188 break;
2189 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2190 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2191 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2192 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2193 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2194 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2195 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2196#undef CASE_VMNAND_VMSET_OPCODES
2197 }
2198 SDValue SEW = CurDAG->getTargetConstant(
2199 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2200 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2201 SDValue VL;
2202 selectVLOp(Node->getOperand(3), VL);
2203
2204 // If vmsge(u) with minimum value, expand it to vmset.
2205 if (IsCmpMinimum) {
2207 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2208 return;
2209 }
2210
2211 if (IsCmpConstant) {
2212 SDValue Imm =
2213 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2214
2215 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2216 {Src1, Imm, VL, SEW}));
2217 return;
2218 }
2219
2220 // Expand to
2221 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2222 SDValue Cmp = SDValue(
2223 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2224 0);
2225 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2226 {Cmp, Cmp, VL, MaskSEW}));
2227 return;
2228 }
2229 case Intrinsic::riscv_vmsgeu_mask:
2230 case Intrinsic::riscv_vmsge_mask: {
2231 SDValue Src1 = Node->getOperand(2);
2232 SDValue Src2 = Node->getOperand(3);
2233 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2234 bool IsCmpConstant = false;
2235 bool IsCmpMinimum = false;
2236 // Only custom select scalar second operand.
2237 if (Src2.getValueType() != XLenVT)
2238 break;
2239 // Small constants are handled with patterns.
2240 MVT Src1VT = Src1.getSimpleValueType();
2241 int64_t CVal = 0;
2242 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2243 IsCmpConstant = true;
2244 CVal = C->getSExtValue();
2245 if (CVal >= -15 && CVal <= 16) {
2246 if (!IsUnsigned || CVal != 0)
2247 break;
2248 IsCmpMinimum = true;
2249 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2250 Src1VT.getScalarSizeInBits())
2251 .getSExtValue()) {
2252 IsCmpMinimum = true;
2253 }
2254 }
2255 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2256 VMOROpcode, VMSGTMaskOpcode;
2257 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2258 default:
2259 llvm_unreachable("Unexpected LMUL!");
2260#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2261 case RISCVVType::lmulenum: \
2262 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2263 : RISCV::PseudoVMSLT_VX_##suffix; \
2264 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2265 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2266 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2267 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2268 break;
2269 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2270 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2271 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2272 CASE_VMSLT_OPCODES(LMUL_1, M1)
2273 CASE_VMSLT_OPCODES(LMUL_2, M2)
2274 CASE_VMSLT_OPCODES(LMUL_4, M4)
2275 CASE_VMSLT_OPCODES(LMUL_8, M8)
2276#undef CASE_VMSLT_OPCODES
2277 }
2278 // Mask operations use the LMUL from the mask type.
2279 switch (RISCVTargetLowering::getLMUL(VT)) {
2280 default:
2281 llvm_unreachable("Unexpected LMUL!");
2282#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2283 case RISCVVType::lmulenum: \
2284 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2285 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2286 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2287 break;
2288 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2289 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2290 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2295#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2296 }
2297 SDValue SEW = CurDAG->getTargetConstant(
2298 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2299 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2300 SDValue VL;
2301 selectVLOp(Node->getOperand(5), VL);
2302 SDValue MaskedOff = Node->getOperand(1);
2303 SDValue Mask = Node->getOperand(4);
2304
2305 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2306 if (IsCmpMinimum) {
2307 // We don't need vmor if the MaskedOff and the Mask are the same
2308 // value.
2309 if (Mask == MaskedOff) {
2310 ReplaceUses(Node, Mask.getNode());
2311 return;
2312 }
2314 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2315 {Mask, MaskedOff, VL, MaskSEW}));
2316 return;
2317 }
2318
2319 // If the MaskedOff value and the Mask are the same value use
2320 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2321 // This avoids needing to copy v0 to vd before starting the next sequence.
2322 if (Mask == MaskedOff) {
2323 SDValue Cmp = SDValue(
2324 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2325 0);
2326 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2327 {Mask, Cmp, VL, MaskSEW}));
2328 return;
2329 }
2330
2331 SDValue PolicyOp =
2332 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2333
2334 if (IsCmpConstant) {
2335 SDValue Imm =
2336 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2337
2338 ReplaceNode(Node, CurDAG->getMachineNode(
2339 VMSGTMaskOpcode, DL, VT,
2340 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2341 return;
2342 }
2343
2344 // Otherwise use
2345 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2346 // The result is mask undisturbed.
2347 // We use the same instructions to emulate mask agnostic behavior, because
2348 // the agnostic result can be either undisturbed or all 1.
2349 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2350 {MaskedOff, Src1, Src2, Mask,
2351 VL, SEW, PolicyOp}),
2352 0);
2353 // vmxor.mm vd, vd, v0 is used to update active value.
2354 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2355 {Cmp, Mask, VL, MaskSEW}));
2356 return;
2357 }
2358 case Intrinsic::riscv_vsetvli:
2359 case Intrinsic::riscv_vsetvlimax:
2360 return selectVSETVLI(Node);
2361 case Intrinsic::riscv_sf_vsettnt:
2362 case Intrinsic::riscv_sf_vsettm:
2363 case Intrinsic::riscv_sf_vsettk:
2364 return selectXSfmmVSET(Node);
2365 }
2366 break;
2367 }
2369 unsigned IntNo = Node->getConstantOperandVal(1);
2370 switch (IntNo) {
2371 // By default we do not custom select any intrinsic.
2372 default:
2373 break;
2374 case Intrinsic::riscv_vlseg2:
2375 case Intrinsic::riscv_vlseg3:
2376 case Intrinsic::riscv_vlseg4:
2377 case Intrinsic::riscv_vlseg5:
2378 case Intrinsic::riscv_vlseg6:
2379 case Intrinsic::riscv_vlseg7:
2380 case Intrinsic::riscv_vlseg8: {
2381 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2382 /*IsStrided*/ false);
2383 return;
2384 }
2385 case Intrinsic::riscv_vlseg2_mask:
2386 case Intrinsic::riscv_vlseg3_mask:
2387 case Intrinsic::riscv_vlseg4_mask:
2388 case Intrinsic::riscv_vlseg5_mask:
2389 case Intrinsic::riscv_vlseg6_mask:
2390 case Intrinsic::riscv_vlseg7_mask:
2391 case Intrinsic::riscv_vlseg8_mask: {
2392 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2393 /*IsStrided*/ false);
2394 return;
2395 }
2396 case Intrinsic::riscv_vlsseg2:
2397 case Intrinsic::riscv_vlsseg3:
2398 case Intrinsic::riscv_vlsseg4:
2399 case Intrinsic::riscv_vlsseg5:
2400 case Intrinsic::riscv_vlsseg6:
2401 case Intrinsic::riscv_vlsseg7:
2402 case Intrinsic::riscv_vlsseg8: {
2403 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2404 /*IsStrided*/ true);
2405 return;
2406 }
2407 case Intrinsic::riscv_vlsseg2_mask:
2408 case Intrinsic::riscv_vlsseg3_mask:
2409 case Intrinsic::riscv_vlsseg4_mask:
2410 case Intrinsic::riscv_vlsseg5_mask:
2411 case Intrinsic::riscv_vlsseg6_mask:
2412 case Intrinsic::riscv_vlsseg7_mask:
2413 case Intrinsic::riscv_vlsseg8_mask: {
2414 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2415 /*IsStrided*/ true);
2416 return;
2417 }
2418 case Intrinsic::riscv_vloxseg2:
2419 case Intrinsic::riscv_vloxseg3:
2420 case Intrinsic::riscv_vloxseg4:
2421 case Intrinsic::riscv_vloxseg5:
2422 case Intrinsic::riscv_vloxseg6:
2423 case Intrinsic::riscv_vloxseg7:
2424 case Intrinsic::riscv_vloxseg8:
2425 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2426 /*IsOrdered*/ true);
2427 return;
2428 case Intrinsic::riscv_vluxseg2:
2429 case Intrinsic::riscv_vluxseg3:
2430 case Intrinsic::riscv_vluxseg4:
2431 case Intrinsic::riscv_vluxseg5:
2432 case Intrinsic::riscv_vluxseg6:
2433 case Intrinsic::riscv_vluxseg7:
2434 case Intrinsic::riscv_vluxseg8:
2435 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2436 /*IsOrdered*/ false);
2437 return;
2438 case Intrinsic::riscv_vloxseg2_mask:
2439 case Intrinsic::riscv_vloxseg3_mask:
2440 case Intrinsic::riscv_vloxseg4_mask:
2441 case Intrinsic::riscv_vloxseg5_mask:
2442 case Intrinsic::riscv_vloxseg6_mask:
2443 case Intrinsic::riscv_vloxseg7_mask:
2444 case Intrinsic::riscv_vloxseg8_mask:
2445 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2446 /*IsOrdered*/ true);
2447 return;
2448 case Intrinsic::riscv_vluxseg2_mask:
2449 case Intrinsic::riscv_vluxseg3_mask:
2450 case Intrinsic::riscv_vluxseg4_mask:
2451 case Intrinsic::riscv_vluxseg5_mask:
2452 case Intrinsic::riscv_vluxseg6_mask:
2453 case Intrinsic::riscv_vluxseg7_mask:
2454 case Intrinsic::riscv_vluxseg8_mask:
2455 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2456 /*IsOrdered*/ false);
2457 return;
2458 case Intrinsic::riscv_vlseg8ff:
2459 case Intrinsic::riscv_vlseg7ff:
2460 case Intrinsic::riscv_vlseg6ff:
2461 case Intrinsic::riscv_vlseg5ff:
2462 case Intrinsic::riscv_vlseg4ff:
2463 case Intrinsic::riscv_vlseg3ff:
2464 case Intrinsic::riscv_vlseg2ff: {
2465 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2466 return;
2467 }
2468 case Intrinsic::riscv_vlseg8ff_mask:
2469 case Intrinsic::riscv_vlseg7ff_mask:
2470 case Intrinsic::riscv_vlseg6ff_mask:
2471 case Intrinsic::riscv_vlseg5ff_mask:
2472 case Intrinsic::riscv_vlseg4ff_mask:
2473 case Intrinsic::riscv_vlseg3ff_mask:
2474 case Intrinsic::riscv_vlseg2ff_mask: {
2475 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2476 return;
2477 }
2478 case Intrinsic::riscv_vloxei:
2479 case Intrinsic::riscv_vloxei_mask:
2480 case Intrinsic::riscv_vluxei:
2481 case Intrinsic::riscv_vluxei_mask: {
2482 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2483 IntNo == Intrinsic::riscv_vluxei_mask;
2484 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2485 IntNo == Intrinsic::riscv_vloxei_mask;
2486
2487 MVT VT = Node->getSimpleValueType(0);
2488 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2489
2490 unsigned CurOp = 2;
2491 SmallVector<SDValue, 8> Operands;
2492 Operands.push_back(Node->getOperand(CurOp++));
2493
2494 MVT IndexVT;
2495 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2496 /*IsStridedOrIndexed*/ true, Operands,
2497 /*IsLoad=*/true, &IndexVT);
2498
2500 "Element count mismatch");
2501
2504 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2505 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2506 reportFatalUsageError("The V extension does not support EEW=64 for "
2507 "index values when XLEN=32");
2508 }
2509 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2510 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2511 static_cast<unsigned>(IndexLMUL));
2512 MachineSDNode *Load =
2513 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2514
2515 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2516
2517 ReplaceNode(Node, Load);
2518 return;
2519 }
2520 case Intrinsic::riscv_vlm:
2521 case Intrinsic::riscv_vle:
2522 case Intrinsic::riscv_vle_mask:
2523 case Intrinsic::riscv_vlse:
2524 case Intrinsic::riscv_vlse_mask: {
2525 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2526 IntNo == Intrinsic::riscv_vlse_mask;
2527 bool IsStrided =
2528 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2529
2530 MVT VT = Node->getSimpleValueType(0);
2531 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2532
2533 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2534 // operand at the IR level. In pseudos, they have both policy and
2535 // passthru operand. The passthru operand is needed to track the
2536 // "tail undefined" state, and the policy is there just for
2537 // for consistency - it will always be "don't care" for the
2538 // unmasked form.
2539 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2540 unsigned CurOp = 2;
2541 SmallVector<SDValue, 8> Operands;
2542 if (HasPassthruOperand)
2543 Operands.push_back(Node->getOperand(CurOp++));
2544 else {
2545 // We eagerly lower to implicit_def (instead of undef), as we
2546 // otherwise fail to select nodes such as: nxv1i1 = undef
2547 SDNode *Passthru =
2548 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2549 Operands.push_back(SDValue(Passthru, 0));
2550 }
2551 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2552 Operands, /*IsLoad=*/true);
2553
2555 const RISCV::VLEPseudo *P =
2556 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2557 static_cast<unsigned>(LMUL));
2558 MachineSDNode *Load =
2559 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2560
2561 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2562
2563 ReplaceNode(Node, Load);
2564 return;
2565 }
2566 case Intrinsic::riscv_vleff:
2567 case Intrinsic::riscv_vleff_mask: {
2568 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2569
2570 MVT VT = Node->getSimpleValueType(0);
2571 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2572
2573 unsigned CurOp = 2;
2574 SmallVector<SDValue, 7> Operands;
2575 Operands.push_back(Node->getOperand(CurOp++));
2576 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2577 /*IsStridedOrIndexed*/ false, Operands,
2578 /*IsLoad=*/true);
2579
2581 const RISCV::VLEPseudo *P =
2582 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2583 Log2SEW, static_cast<unsigned>(LMUL));
2584 MachineSDNode *Load = CurDAG->getMachineNode(
2585 P->Pseudo, DL, Node->getVTList(), Operands);
2586 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2587
2588 ReplaceNode(Node, Load);
2589 return;
2590 }
2591 case Intrinsic::riscv_nds_vln:
2592 case Intrinsic::riscv_nds_vln_mask:
2593 case Intrinsic::riscv_nds_vlnu:
2594 case Intrinsic::riscv_nds_vlnu_mask: {
2595 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2596 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2597 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2598 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2599
2600 MVT VT = Node->getSimpleValueType(0);
2601 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2602 unsigned CurOp = 2;
2603 SmallVector<SDValue, 8> Operands;
2604
2605 Operands.push_back(Node->getOperand(CurOp++));
2606 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2607 /*IsStridedOrIndexed=*/false, Operands,
2608 /*IsLoad=*/true);
2609
2611 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2612 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2613 MachineSDNode *Load =
2614 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2615
2616 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2617 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2618
2619 ReplaceNode(Node, Load);
2620 return;
2621 }
2622 }
2623 break;
2624 }
2625 case ISD::INTRINSIC_VOID: {
2626 unsigned IntNo = Node->getConstantOperandVal(1);
2627 switch (IntNo) {
2628 case Intrinsic::riscv_vsseg2:
2629 case Intrinsic::riscv_vsseg3:
2630 case Intrinsic::riscv_vsseg4:
2631 case Intrinsic::riscv_vsseg5:
2632 case Intrinsic::riscv_vsseg6:
2633 case Intrinsic::riscv_vsseg7:
2634 case Intrinsic::riscv_vsseg8: {
2635 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2636 /*IsStrided*/ false);
2637 return;
2638 }
2639 case Intrinsic::riscv_vsseg2_mask:
2640 case Intrinsic::riscv_vsseg3_mask:
2641 case Intrinsic::riscv_vsseg4_mask:
2642 case Intrinsic::riscv_vsseg5_mask:
2643 case Intrinsic::riscv_vsseg6_mask:
2644 case Intrinsic::riscv_vsseg7_mask:
2645 case Intrinsic::riscv_vsseg8_mask: {
2646 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2647 /*IsStrided*/ false);
2648 return;
2649 }
2650 case Intrinsic::riscv_vssseg2:
2651 case Intrinsic::riscv_vssseg3:
2652 case Intrinsic::riscv_vssseg4:
2653 case Intrinsic::riscv_vssseg5:
2654 case Intrinsic::riscv_vssseg6:
2655 case Intrinsic::riscv_vssseg7:
2656 case Intrinsic::riscv_vssseg8: {
2657 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2658 /*IsStrided*/ true);
2659 return;
2660 }
2661 case Intrinsic::riscv_vssseg2_mask:
2662 case Intrinsic::riscv_vssseg3_mask:
2663 case Intrinsic::riscv_vssseg4_mask:
2664 case Intrinsic::riscv_vssseg5_mask:
2665 case Intrinsic::riscv_vssseg6_mask:
2666 case Intrinsic::riscv_vssseg7_mask:
2667 case Intrinsic::riscv_vssseg8_mask: {
2668 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2669 /*IsStrided*/ true);
2670 return;
2671 }
2672 case Intrinsic::riscv_vsoxseg2:
2673 case Intrinsic::riscv_vsoxseg3:
2674 case Intrinsic::riscv_vsoxseg4:
2675 case Intrinsic::riscv_vsoxseg5:
2676 case Intrinsic::riscv_vsoxseg6:
2677 case Intrinsic::riscv_vsoxseg7:
2678 case Intrinsic::riscv_vsoxseg8:
2679 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2680 /*IsOrdered*/ true);
2681 return;
2682 case Intrinsic::riscv_vsuxseg2:
2683 case Intrinsic::riscv_vsuxseg3:
2684 case Intrinsic::riscv_vsuxseg4:
2685 case Intrinsic::riscv_vsuxseg5:
2686 case Intrinsic::riscv_vsuxseg6:
2687 case Intrinsic::riscv_vsuxseg7:
2688 case Intrinsic::riscv_vsuxseg8:
2689 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2690 /*IsOrdered*/ false);
2691 return;
2692 case Intrinsic::riscv_vsoxseg2_mask:
2693 case Intrinsic::riscv_vsoxseg3_mask:
2694 case Intrinsic::riscv_vsoxseg4_mask:
2695 case Intrinsic::riscv_vsoxseg5_mask:
2696 case Intrinsic::riscv_vsoxseg6_mask:
2697 case Intrinsic::riscv_vsoxseg7_mask:
2698 case Intrinsic::riscv_vsoxseg8_mask:
2699 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2700 /*IsOrdered*/ true);
2701 return;
2702 case Intrinsic::riscv_vsuxseg2_mask:
2703 case Intrinsic::riscv_vsuxseg3_mask:
2704 case Intrinsic::riscv_vsuxseg4_mask:
2705 case Intrinsic::riscv_vsuxseg5_mask:
2706 case Intrinsic::riscv_vsuxseg6_mask:
2707 case Intrinsic::riscv_vsuxseg7_mask:
2708 case Intrinsic::riscv_vsuxseg8_mask:
2709 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2710 /*IsOrdered*/ false);
2711 return;
2712 case Intrinsic::riscv_vsoxei:
2713 case Intrinsic::riscv_vsoxei_mask:
2714 case Intrinsic::riscv_vsuxei:
2715 case Intrinsic::riscv_vsuxei_mask: {
2716 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2717 IntNo == Intrinsic::riscv_vsuxei_mask;
2718 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2719 IntNo == Intrinsic::riscv_vsoxei_mask;
2720
2721 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2722 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2723
2724 unsigned CurOp = 2;
2725 SmallVector<SDValue, 8> Operands;
2726 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2727
2728 MVT IndexVT;
2729 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2730 /*IsStridedOrIndexed*/ true, Operands,
2731 /*IsLoad=*/false, &IndexVT);
2732
2734 "Element count mismatch");
2735
2738 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2739 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2740 reportFatalUsageError("The V extension does not support EEW=64 for "
2741 "index values when XLEN=32");
2742 }
2743 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2744 IsMasked, IsOrdered, IndexLog2EEW,
2745 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2746 MachineSDNode *Store =
2747 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2748
2749 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2750
2751 ReplaceNode(Node, Store);
2752 return;
2753 }
2754 case Intrinsic::riscv_vsm:
2755 case Intrinsic::riscv_vse:
2756 case Intrinsic::riscv_vse_mask:
2757 case Intrinsic::riscv_vsse:
2758 case Intrinsic::riscv_vsse_mask: {
2759 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2760 IntNo == Intrinsic::riscv_vsse_mask;
2761 bool IsStrided =
2762 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2763
2764 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2765 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2766
2767 unsigned CurOp = 2;
2768 SmallVector<SDValue, 8> Operands;
2769 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2770
2771 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2772 Operands);
2773
2775 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2776 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2777 MachineSDNode *Store =
2778 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2779 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2780
2781 ReplaceNode(Node, Store);
2782 return;
2783 }
2784 case Intrinsic::riscv_sf_vc_x_se:
2785 case Intrinsic::riscv_sf_vc_i_se:
2787 return;
2788 case Intrinsic::riscv_sf_vlte8:
2789 case Intrinsic::riscv_sf_vlte16:
2790 case Intrinsic::riscv_sf_vlte32:
2791 case Intrinsic::riscv_sf_vlte64: {
2792 unsigned Log2SEW;
2793 unsigned PseudoInst;
2794 switch (IntNo) {
2795 case Intrinsic::riscv_sf_vlte8:
2796 PseudoInst = RISCV::PseudoSF_VLTE8;
2797 Log2SEW = 3;
2798 break;
2799 case Intrinsic::riscv_sf_vlte16:
2800 PseudoInst = RISCV::PseudoSF_VLTE16;
2801 Log2SEW = 4;
2802 break;
2803 case Intrinsic::riscv_sf_vlte32:
2804 PseudoInst = RISCV::PseudoSF_VLTE32;
2805 Log2SEW = 5;
2806 break;
2807 case Intrinsic::riscv_sf_vlte64:
2808 PseudoInst = RISCV::PseudoSF_VLTE64;
2809 Log2SEW = 6;
2810 break;
2811 }
2812
2813 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2814 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2815 SDValue Operands[] = {Node->getOperand(2),
2816 Node->getOperand(3),
2817 Node->getOperand(4),
2818 SEWOp,
2819 TWidenOp,
2820 Node->getOperand(0)};
2821
2822 MachineSDNode *TileLoad =
2823 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2824 CurDAG->setNodeMemRefs(TileLoad,
2825 {cast<MemSDNode>(Node)->getMemOperand()});
2826
2827 ReplaceNode(Node, TileLoad);
2828 return;
2829 }
2830 case Intrinsic::riscv_sf_mm_s_s:
2831 case Intrinsic::riscv_sf_mm_s_u:
2832 case Intrinsic::riscv_sf_mm_u_s:
2833 case Intrinsic::riscv_sf_mm_u_u:
2834 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2835 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2836 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2837 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2838 case Intrinsic::riscv_sf_mm_f_f: {
2839 bool HasFRM = false;
2840 unsigned PseudoInst;
2841 switch (IntNo) {
2842 case Intrinsic::riscv_sf_mm_s_s:
2843 PseudoInst = RISCV::PseudoSF_MM_S_S;
2844 break;
2845 case Intrinsic::riscv_sf_mm_s_u:
2846 PseudoInst = RISCV::PseudoSF_MM_S_U;
2847 break;
2848 case Intrinsic::riscv_sf_mm_u_s:
2849 PseudoInst = RISCV::PseudoSF_MM_U_S;
2850 break;
2851 case Intrinsic::riscv_sf_mm_u_u:
2852 PseudoInst = RISCV::PseudoSF_MM_U_U;
2853 break;
2854 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2855 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2856 HasFRM = true;
2857 break;
2858 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2859 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2860 HasFRM = true;
2861 break;
2862 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2863 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2864 HasFRM = true;
2865 break;
2866 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2867 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2868 HasFRM = true;
2869 break;
2870 case Intrinsic::riscv_sf_mm_f_f:
2871 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2872 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2873 else
2874 PseudoInst = RISCV::PseudoSF_MM_F_F;
2875 HasFRM = true;
2876 break;
2877 }
2878 uint64_t TileNum = Node->getConstantOperandVal(2);
2879 SDValue Op1 = Node->getOperand(3);
2880 SDValue Op2 = Node->getOperand(4);
2881 MVT VT = Op1->getSimpleValueType(0);
2882 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2883 SDValue TmOp = Node->getOperand(5);
2884 SDValue TnOp = Node->getOperand(6);
2885 SDValue TkOp = Node->getOperand(7);
2886 SDValue TWidenOp = Node->getOperand(8);
2887 SDValue Chain = Node->getOperand(0);
2888
2889 // sf.mm.f.f with sew=32, twiden=2 is invalid
2890 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2891 TWidenOp->getAsZExtVal() == 2)
2892 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2893
2894 SmallVector<SDValue, 10> Operands(
2895 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2896 if (HasFRM)
2897 Operands.push_back(
2898 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2899 Operands.append({TmOp, TnOp, TkOp,
2900 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2901 Chain});
2902
2903 auto *NewNode =
2904 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2905
2906 ReplaceNode(Node, NewNode);
2907 return;
2908 }
2909 case Intrinsic::riscv_sf_vtzero_t: {
2910 uint64_t TileNum = Node->getConstantOperandVal(2);
2911 SDValue Tm = Node->getOperand(3);
2912 SDValue Tn = Node->getOperand(4);
2913 SDValue Log2SEW = Node->getOperand(5);
2914 SDValue TWiden = Node->getOperand(6);
2915 SDValue Chain = Node->getOperand(0);
2916 auto *NewNode = CurDAG->getMachineNode(
2917 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2918 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2919 TWiden, Chain});
2920
2921 ReplaceNode(Node, NewNode);
2922 return;
2923 }
2924 }
2925 break;
2926 }
2927 case ISD::BITCAST: {
2928 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2929 // Just drop bitcasts between vectors if both are fixed or both are
2930 // scalable.
2931 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2932 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2933 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2934 CurDAG->RemoveDeadNode(Node);
2935 return;
2936 }
2937 if (Subtarget->hasStdExtP()) {
2938 bool Is32BitCast =
2939 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2940 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2941 bool Is64BitCast =
2942 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2943 SrcVT == MVT::v2i32)) ||
2944 (SrcVT == MVT::i64 &&
2945 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2946 if (Is32BitCast || Is64BitCast) {
2947 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2948 CurDAG->RemoveDeadNode(Node);
2949 return;
2950 }
2951 }
2952 break;
2953 }
2954 case ISD::SPLAT_VECTOR: {
2955 if (!Subtarget->hasStdExtP())
2956 break;
2957 if (auto *ConstNode = dyn_cast<ConstantSDNode>(Node->getOperand(0))) {
2958 bool IsDoubleWide = Subtarget->isPExtPackedDoubleType(VT);
2959
2960 if (ConstNode->isZero()) {
2961 MCPhysReg X0Reg = IsDoubleWide ? RISCV::X0_Pair : RISCV::X0;
2962 SDValue New =
2963 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, X0Reg, VT);
2964 ReplaceNode(Node, New.getNode());
2965 return;
2966 }
2967
2968 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2969 APInt Val = ConstNode->getAPIntValue().trunc(EltSize);
2970
2971 // Use LI for all ones since it can be compressed to c.li.
2972 if (Val.isAllOnes() && !IsDoubleWide) {
2973 SDNode *NewNode = CurDAG->getMachineNode(
2974 RISCV::ADDI, DL, VT, CurDAG->getRegister(RISCV::X0, VT),
2975 CurDAG->getAllOnesConstant(DL, XLenVT, /*IsTarget=*/true));
2976 ReplaceNode(Node, NewNode);
2977 return;
2978 }
2979
2980 // Find the smallest splat.
2981 if (Val.getBitWidth() > 16 && Val.isSplat(16))
2982 Val = Val.trunc(16);
2983 if (Val.getBitWidth() > 8 && Val.isSplat(8))
2984 Val = Val.trunc(8);
2985
2986 EltSize = Val.getBitWidth();
2987 int64_t Imm = Val.getSExtValue();
2988
2989 unsigned Opc = 0;
2990 if (EltSize == 8) {
2991 Opc = IsDoubleWide ? RISCV::PLI_DB : RISCV::PLI_B;
2992 } else if (EltSize == 16 && isInt<10>(Imm)) {
2993 Opc = IsDoubleWide ? RISCV::PLI_DH : RISCV::PLI_H;
2994 } else if (!IsDoubleWide && EltSize == 32 && isInt<10>(Imm)) {
2995 Opc = RISCV::PLI_W;
2996 } else if (EltSize == 16 && isShiftedInt<10, 6>(Imm)) {
2997 Opc = IsDoubleWide ? RISCV::PLUI_DH : RISCV::PLUI_H;
2998 Imm = Imm >> 6;
2999 } else if (!IsDoubleWide && EltSize == 32 && isShiftedInt<10, 22>(Imm)) {
3000 Opc = RISCV::PLUI_W;
3001 Imm = Imm >> 22;
3002 }
3003
3004 if (Opc) {
3005 SDNode *NewNode = CurDAG->getMachineNode(
3006 Opc, DL, VT, CurDAG->getSignedTargetConstant(Imm, DL, XLenVT));
3007 ReplaceNode(Node, NewNode);
3008 return;
3009 }
3010 }
3011
3012 // Use buildGPRPair for v2i32 on RV32.
3013 if (!Subtarget->is64Bit() && VT == MVT::v2i32) {
3014 SDValue Pair = buildGPRPair(CurDAG, DL, VT, Node->getOperand(0),
3015 Node->getOperand(0));
3016 ReplaceNode(Node, Pair.getNode());
3017 return;
3018 }
3019
3020 break;
3021 }
3022 case ISD::BUILD_VECTOR: {
3023 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::v2i32) {
3024 SDValue Pair = buildGPRPair(CurDAG, DL, VT, Node->getOperand(0),
3025 Node->getOperand(1));
3026 ReplaceNode(Node, Pair.getNode());
3027 return;
3028 }
3029 break;
3030 }
3031 case ISD::CONCAT_VECTORS: {
3032 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit() &&
3033 (VT == MVT::v4i16 || VT == MVT::v8i8)) {
3034 assert(Node->getNumOperands() == 2);
3035 SDValue Lo = Node->getOperand(0);
3036 SDValue Hi = Node->getOperand(1);
3037 SDValue Pair = buildGPRPair(CurDAG, DL, VT, Lo, Hi);
3038 ReplaceNode(Node, Pair.getNode());
3039 return;
3040 }
3041 break;
3042 }
3044 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit()) {
3045 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
3046 if (VT == MVT::i32 && SrcVT == MVT::v2i32) {
3047 auto *IdxC = dyn_cast<ConstantSDNode>(Node->getOperand(1));
3048 if (!IdxC)
3049 break;
3050 unsigned Idx = IdxC->getZExtValue();
3051 if (Idx > 1)
3052 break;
3053
3054 unsigned SubRegIdx =
3055 Idx == 0 ? RISCV::sub_gpr_even : RISCV::sub_gpr_odd;
3056 SDValue Src = Node->getOperand(0);
3057 SDValue Extract =
3058 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Src);
3059 ReplaceNode(Node, Extract.getNode());
3060 return;
3061 }
3062 }
3063 break;
3064 }
3066 if (Subtarget->hasStdExtP()) {
3067 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
3068 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
3069 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
3070 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3071 CurDAG->RemoveDeadNode(Node);
3072 return;
3073 }
3074 }
3075 break;
3077 case RISCVISD::TUPLE_INSERT: {
3078 SDValue V = Node->getOperand(0);
3079 SDValue SubV = Node->getOperand(1);
3080 SDLoc DL(SubV);
3081 auto Idx = Node->getConstantOperandVal(2);
3082 MVT SubVecVT = SubV.getSimpleValueType();
3083
3084 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3085 MVT SubVecContainerVT = SubVecVT;
3086 // Establish the correct scalable-vector types for any fixed-length type.
3087 if (SubVecVT.isFixedLengthVector()) {
3088 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
3090 [[maybe_unused]] bool ExactlyVecRegSized =
3091 Subtarget->expandVScale(SubVecVT.getSizeInBits())
3092 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
3093 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3094 .getKnownMinValue()));
3095 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3096 }
3097 MVT ContainerVT = VT;
3098 if (VT.isFixedLengthVector())
3099 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3100
3101 const auto *TRI = Subtarget->getRegisterInfo();
3102 unsigned SubRegIdx;
3103 std::tie(SubRegIdx, Idx) =
3105 ContainerVT, SubVecContainerVT, Idx, TRI);
3106
3107 // If the Idx hasn't been completely eliminated then this is a subvector
3108 // insert which doesn't naturally align to a vector register. These must
3109 // be handled using instructions to manipulate the vector registers.
3110 if (Idx != 0)
3111 break;
3112
3113 RISCVVType::VLMUL SubVecLMUL =
3114 RISCVTargetLowering::getLMUL(SubVecContainerVT);
3115 [[maybe_unused]] bool IsSubVecPartReg =
3116 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3117 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3118 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3119 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3120 V.isUndef()) &&
3121 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3122 "the subvector is smaller than a full-sized register");
3123
3124 // If we haven't set a SubRegIdx, then we must be going between
3125 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3126 if (SubRegIdx == RISCV::NoSubRegister) {
3127 unsigned InRegClassID =
3130 InRegClassID &&
3131 "Unexpected subvector extraction");
3132 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3133 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3134 DL, VT, SubV, RC);
3135 ReplaceNode(Node, NewNode);
3136 return;
3137 }
3138
3139 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
3140 ReplaceNode(Node, Insert.getNode());
3141 return;
3142 }
3144 case RISCVISD::TUPLE_EXTRACT: {
3145 SDValue V = Node->getOperand(0);
3146 auto Idx = Node->getConstantOperandVal(1);
3147 MVT InVT = V.getSimpleValueType();
3148
3149 // Handle P-extension extract_subvector for v2i16 from v4i16 and v4i8 from
3150 // v8i8
3151 if (Subtarget->hasStdExtP() && !Subtarget->is64Bit() &&
3152 ((InVT == MVT::v4i16 && VT == MVT::v2i16) ||
3153 (InVT == MVT::v8i8 && VT == MVT::v4i8))) {
3154 unsigned NumElts = VT.getVectorNumElements();
3155 if (Idx != 0 && Idx != NumElts)
3156 break;
3157
3158 unsigned SubRegIdx = Idx == 0 ? RISCV::sub_gpr_even : RISCV::sub_gpr_odd;
3159 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3160 ReplaceNode(Node, Extract.getNode());
3161 return;
3162 }
3163
3164 SDLoc DL(V);
3165
3166 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3167 MVT SubVecContainerVT = VT;
3168 // Establish the correct scalable-vector types for any fixed-length type.
3169 if (VT.isFixedLengthVector()) {
3170 assert(Idx == 0);
3171 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3172 }
3173 if (InVT.isFixedLengthVector())
3174 InVT = TLI.getContainerForFixedLengthVector(InVT);
3175
3176 const auto *TRI = Subtarget->getRegisterInfo();
3177 unsigned SubRegIdx;
3178 std::tie(SubRegIdx, Idx) =
3180 InVT, SubVecContainerVT, Idx, TRI);
3181
3182 // If the Idx hasn't been completely eliminated then this is a subvector
3183 // extract which doesn't naturally align to a vector register. These must
3184 // be handled using instructions to manipulate the vector registers.
3185 if (Idx != 0)
3186 break;
3187
3188 // If we haven't set a SubRegIdx, then we must be going between
3189 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3190 if (SubRegIdx == RISCV::NoSubRegister) {
3191 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
3193 InRegClassID &&
3194 "Unexpected subvector extraction");
3195 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3196 SDNode *NewNode =
3197 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3198 ReplaceNode(Node, NewNode);
3199 return;
3200 }
3201
3202 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3203 ReplaceNode(Node, Extract.getNode());
3204 return;
3205 }
3206 case RISCVISD::VMV_S_X_VL:
3207 case RISCVISD::VFMV_S_F_VL:
3208 case RISCVISD::VMV_V_X_VL:
3209 case RISCVISD::VFMV_V_F_VL: {
3210 // Try to match splat of a scalar load to a strided load with stride of x0.
3211 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3212 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3213 if (!Node->getOperand(0).isUndef())
3214 break;
3215 SDValue Src = Node->getOperand(1);
3216 auto *Ld = dyn_cast<LoadSDNode>(Src);
3217 // Can't fold load update node because the second
3218 // output is used so that load update node can't be removed.
3219 if (!Ld || Ld->isIndexed())
3220 break;
3221 EVT MemVT = Ld->getMemoryVT();
3222 // The memory VT should be the same size as the element type.
3223 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3224 break;
3225 if (!IsProfitableToFold(Src, Node, Node) ||
3226 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
3227 break;
3228
3229 SDValue VL;
3230 if (IsScalarMove) {
3231 // We could deal with more VL if we update the VSETVLI insert pass to
3232 // avoid introducing more VSETVLI.
3233 if (!isOneConstant(Node->getOperand(2)))
3234 break;
3235 selectVLOp(Node->getOperand(2), VL);
3236 } else
3237 selectVLOp(Node->getOperand(2), VL);
3238
3239 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
3240 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
3241
3242 // If VL=1, then we don't need to do a strided load and can just do a
3243 // regular load.
3244 bool IsStrided = !isOneConstant(VL);
3245
3246 // Only do a strided load if we have optimized zero-stride vector load.
3247 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3248 break;
3249
3250 SmallVector<SDValue> Operands = {
3251 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
3252 Ld->getBasePtr()};
3253 if (IsStrided)
3254 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
3256 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
3257 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
3258
3260 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3261 /*IsMasked*/ false, IsStrided, /*FF*/ false,
3262 Log2SEW, static_cast<unsigned>(LMUL));
3263 MachineSDNode *Load =
3264 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
3265 // Update the chain.
3266 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
3267 // Record the mem-refs
3268 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
3269 // Replace the splat with the vlse.
3270 ReplaceNode(Node, Load);
3271 return;
3272 }
3273 case ISD::PREFETCH:
3274 // MIPS's prefetch instruction already encodes the hint within the
3275 // instruction itself, so no extra NTL hint is needed.
3276 if (Subtarget->hasVendorXMIPSCBOP())
3277 break;
3278
3279 unsigned Locality = Node->getConstantOperandVal(3);
3280 if (Locality > 2)
3281 break;
3282
3283 auto *LoadStoreMem = cast<MemSDNode>(Node);
3284 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3286
3287 int NontemporalLevel = 0;
3288 switch (Locality) {
3289 case 0:
3290 NontemporalLevel = 3; // NTL.ALL
3291 break;
3292 case 1:
3293 NontemporalLevel = 1; // NTL.PALL
3294 break;
3295 case 2:
3296 NontemporalLevel = 0; // NTL.P1
3297 break;
3298 default:
3299 llvm_unreachable("unexpected locality value.");
3300 }
3301
3302 if (NontemporalLevel & 0b1)
3304 if (NontemporalLevel & 0b10)
3306 break;
3307 }
3308
3309 // Select the default instruction.
3310 SelectCode(Node);
3311}
3312
3314 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3315 std::vector<SDValue> &OutOps) {
3316 // Always produce a register and immediate operand, as expected by
3317 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3318 switch (ConstraintID) {
3321 SDValue Op0, Op1;
3322 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
3323 assert(Found && "SelectAddrRegImm should always succeed");
3324 OutOps.push_back(Op0);
3325 OutOps.push_back(Op1);
3326 return false;
3327 }
3329 OutOps.push_back(Op);
3330 OutOps.push_back(
3331 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
3332 return false;
3333 default:
3334 report_fatal_error("Unexpected asm memory constraint " +
3335 InlineAsm::getMemConstraintName(ConstraintID));
3336 }
3337
3338 return true;
3339}
3340
3342 SDValue &Offset) {
3343 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3344 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3345 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3346 return true;
3347 }
3348
3349 return false;
3350}
3351
3352// Fold constant addresses.
3353static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3354 const MVT VT, const RISCVSubtarget *Subtarget,
3356 bool IsPrefetch = false) {
3357 if (!isa<ConstantSDNode>(Addr))
3358 return false;
3359
3360 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3361
3362 // If the constant is a simm12, we can fold the whole constant and use X0 as
3363 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3364 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3365 int64_t Lo12 = SignExtend64<12>(CVal);
3366 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3367 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3368 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3369 return false;
3370 if (Hi) {
3371 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3372 Base = SDValue(
3373 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3374 CurDAG->getTargetConstant(Hi20, DL, VT)),
3375 0);
3376 } else {
3377 Base = CurDAG->getRegister(RISCV::X0, VT);
3378 }
3379 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3380 return true;
3381 }
3382
3383 // Ask how constant materialization would handle this constant.
3384 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3385
3386 // If the last instruction would be an ADDI, we can fold its immediate and
3387 // emit the rest of the sequence as the base.
3388 if (Seq.back().getOpcode() != RISCV::ADDI)
3389 return false;
3390 Lo12 = Seq.back().getImm();
3391 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3392 return false;
3393
3394 // Drop the last instruction.
3395 Seq.pop_back();
3396 assert(!Seq.empty() && "Expected more instructions in sequence");
3397
3398 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3399 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3400 return true;
3401}
3402
3403// Is this ADD instruction only used as the base pointer of scalar loads and
3404// stores?
3406 for (auto *User : Add->users()) {
3407 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3408 User->getOpcode() != RISCVISD::LD_RV32 &&
3409 User->getOpcode() != RISCVISD::SD_RV32 &&
3410 User->getOpcode() != ISD::ATOMIC_LOAD &&
3411 User->getOpcode() != ISD::ATOMIC_STORE)
3412 return false;
3413 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3414 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3415 VT != MVT::f64)
3416 return false;
3417 // Don't allow stores of the value. It must be used as the address.
3418 if (User->getOpcode() == ISD::STORE &&
3419 cast<StoreSDNode>(User)->getValue() == Add)
3420 return false;
3421 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3422 cast<AtomicSDNode>(User)->getVal() == Add)
3423 return false;
3424 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3425 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3426 return false;
3427 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3428 return false;
3429 }
3430
3431 return true;
3432}
3433
3435 switch (User->getOpcode()) {
3436 default:
3437 return false;
3438 case ISD::LOAD:
3439 case RISCVISD::LD_RV32:
3440 case ISD::ATOMIC_LOAD:
3441 break;
3442 case ISD::STORE:
3443 // Don't allow stores of Add. It must only be used as the address.
3445 return false;
3446 break;
3447 case RISCVISD::SD_RV32:
3448 // Don't allow stores of Add. It must only be used as the address.
3449 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3450 return false;
3451 break;
3452 case ISD::ATOMIC_STORE:
3453 // Don't allow stores of Add. It must only be used as the address.
3454 if (cast<AtomicSDNode>(User)->getVal() == Add)
3455 return false;
3456 break;
3457 }
3458
3459 return true;
3460}
3461
3462// To prevent SelectAddrRegImm from folding offsets that conflict with the
3463// fusion of PseudoMovAddr, check if the offset of every use of a given address
3464// is within the alignment.
3466 Align Alignment) {
3467 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3468 for (auto *User : Addr->users()) {
3469 // If the user is a load or store, then the offset is 0 which is always
3470 // within alignment.
3471 if (isRegImmLoadOrStore(User, Addr))
3472 continue;
3473
3474 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3475 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3476 if (!isInt<12>(CVal) || Alignment <= CVal)
3477 return false;
3478
3479 // Make sure all uses are foldable load/stores.
3480 for (auto *AddUser : User->users())
3481 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3482 return false;
3483
3484 continue;
3485 }
3486
3487 return false;
3488 }
3489
3490 return true;
3491}
3492
3494 SDValue &Offset) {
3495 if (SelectAddrFrameIndex(Addr, Base, Offset))
3496 return true;
3497
3498 SDLoc DL(Addr);
3499 MVT VT = Addr.getSimpleValueType();
3500
3501 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3502 bool CanFold = true;
3503 // Unconditionally fold if operand 1 is not a global address (e.g.
3504 // externsymbol)
3505 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3506 const DataLayout &DL = CurDAG->getDataLayout();
3507 Align Alignment = commonAlignment(
3508 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3509 if (!areOffsetsWithinAlignment(Addr, Alignment))
3510 CanFold = false;
3511 }
3512 if (CanFold) {
3513 Base = Addr.getOperand(0);
3514 Offset = Addr.getOperand(1);
3515 return true;
3516 }
3517 }
3518
3519 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3520 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3521 if (isInt<12>(CVal)) {
3522 Base = Addr.getOperand(0);
3523 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3524 SDValue LoOperand = Base.getOperand(1);
3525 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3526 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3527 // (its low part, really), then we can rely on the alignment of that
3528 // variable to provide a margin of safety before low part can overflow
3529 // the 12 bits of the load/store offset. Check if CVal falls within
3530 // that margin; if so (low part + CVal) can't overflow.
3531 const DataLayout &DL = CurDAG->getDataLayout();
3532 Align Alignment = commonAlignment(
3533 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3534 if ((CVal == 0 || Alignment > CVal) &&
3535 areOffsetsWithinAlignment(Base, Alignment)) {
3536 int64_t CombinedOffset = CVal + GA->getOffset();
3537 Base = Base.getOperand(0);
3538 Offset = CurDAG->getTargetGlobalAddress(
3539 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3540 CombinedOffset, GA->getTargetFlags());
3541 return true;
3542 }
3543 }
3544 }
3545
3546 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3547 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3548 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3549 return true;
3550 }
3551 }
3552
3553 // Handle ADD with large immediates.
3554 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3555 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3556 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3557
3558 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3559 // an ADDI for part of the offset and fold the rest into the load/store.
3560 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3561 if (CVal >= -4096 && CVal <= 4094) {
3562 int64_t Adj = CVal < 0 ? -2048 : 2047;
3563 Base = SDValue(
3564 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3565 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3566 0);
3567 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3568 return true;
3569 }
3570
3571 // For larger immediates, we might be able to save one instruction from
3572 // constant materialization by folding the Lo12 bits of the immediate into
3573 // the address. We should only do this if the ADD is only used by loads and
3574 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3575 // separately with the full materialized immediate creating extra
3576 // instructions.
3577 if (isWorthFoldingAdd(Addr) &&
3578 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3579 Offset, /*IsPrefetch=*/false)) {
3580 // Insert an ADD instruction with the materialized Hi52 bits.
3581 Base = SDValue(
3582 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3583 0);
3584 return true;
3585 }
3586 }
3587
3588 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3589 /*IsPrefetch=*/false))
3590 return true;
3591
3592 Base = Addr;
3593 Offset = CurDAG->getTargetConstant(0, DL, VT);
3594 return true;
3595}
3596
3597/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3599 SDValue &Offset) {
3600 if (SelectAddrFrameIndex(Addr, Base, Offset))
3601 return true;
3602
3603 SDLoc DL(Addr);
3604 MVT VT = Addr.getSimpleValueType();
3605
3606 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3607 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3608 if (isUInt<9>(CVal)) {
3609 Base = Addr.getOperand(0);
3610
3611 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3612 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3613 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3614 return true;
3615 }
3616 }
3617
3618 Base = Addr;
3619 Offset = CurDAG->getTargetConstant(0, DL, VT);
3620 return true;
3621}
3622
3623/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3624/// Offset should be all zeros.
3626 SDValue &Offset) {
3627 if (SelectAddrFrameIndex(Addr, Base, Offset))
3628 return true;
3629
3630 SDLoc DL(Addr);
3631 MVT VT = Addr.getSimpleValueType();
3632
3633 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3634 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3635 if (isInt<12>(CVal)) {
3636 Base = Addr.getOperand(0);
3637
3638 // Early-out if not a valid offset.
3639 if ((CVal & 0b11111) != 0) {
3640 Base = Addr;
3641 Offset = CurDAG->getTargetConstant(0, DL, VT);
3642 return true;
3643 }
3644
3645 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3646 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3647 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3648 return true;
3649 }
3650 }
3651
3652 // Handle ADD with large immediates.
3653 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3654 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3655 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3656
3657 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3658 // one instruction by folding adjustment (-2048 or 2016) into the address.
3659 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3660 int64_t Adj = CVal < 0 ? -2048 : 2016;
3661 int64_t AdjustedOffset = CVal - Adj;
3662 Base =
3663 SDValue(CurDAG->getMachineNode(
3664 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3665 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3666 0);
3667 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3668 return true;
3669 }
3670
3671 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3672 Offset, /*IsPrefetch=*/true)) {
3673 // Insert an ADD instruction with the materialized Hi52 bits.
3674 Base = SDValue(
3675 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3676 0);
3677 return true;
3678 }
3679 }
3680
3681 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3682 /*IsPrefetch=*/true))
3683 return true;
3684
3685 Base = Addr;
3686 Offset = CurDAG->getTargetConstant(0, DL, VT);
3687 return true;
3688}
3689
3690/// Return true if this a load/store that we have a RegRegScale instruction for.
3692 const RISCVSubtarget &Subtarget) {
3693 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3694 return false;
3695 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3696 if (!(VT.isScalarInteger() &&
3697 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3698 !((VT == MVT::f32 || VT == MVT::f64) &&
3699 Subtarget.hasVendorXTHeadFMemIdx()))
3700 return false;
3701 // Don't allow stores of the value. It must be used as the address.
3702 if (User->getOpcode() == ISD::STORE &&
3703 cast<StoreSDNode>(User)->getValue() == Add)
3704 return false;
3705
3706 return true;
3707}
3708
3709/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3710/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3711/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3712/// single addi and we don't have a SHXADD instruction we could use.
3713/// FIXME: May still need to check how many and what kind of users the SHL has.
3715 SDValue Add,
3716 SDValue Shift = SDValue()) {
3717 bool FoundADDI = false;
3718 for (auto *User : Add->users()) {
3719 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3720 continue;
3721
3722 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3723 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3725 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3726 return false;
3727
3728 FoundADDI = true;
3729
3730 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3731 assert(Shift.getOpcode() == ISD::SHL);
3732 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3733 if (Subtarget.hasShlAdd(ShiftAmt))
3734 return false;
3735
3736 // All users of the ADDI should be load/store.
3737 for (auto *ADDIUser : User->users())
3738 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3739 return false;
3740 }
3741
3742 return true;
3743}
3744
3746 unsigned MaxShiftAmount,
3747 SDValue &Base, SDValue &Index,
3748 SDValue &Scale) {
3749 if (Addr.getOpcode() != ISD::ADD)
3750 return false;
3751 SDValue LHS = Addr.getOperand(0);
3752 SDValue RHS = Addr.getOperand(1);
3753
3754 EVT VT = Addr.getSimpleValueType();
3755 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3756 SDValue &Shift) {
3757 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3758 return false;
3759
3760 // Only match shifts by a value in range [0, MaxShiftAmount].
3761 unsigned ShiftAmt = N.getConstantOperandVal(1);
3762 if (ShiftAmt > MaxShiftAmount)
3763 return false;
3764
3765 Index = N.getOperand(0);
3766 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3767 return true;
3768 };
3769
3770 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3771 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3772 if (LHS.getOpcode() == ISD::ADD &&
3773 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3774 isInt<12>(C1->getSExtValue())) {
3775 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3776 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3777 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3778 SDLoc(Addr), VT);
3779 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3780 LHS.getOperand(0), C1Val),
3781 0);
3782 return true;
3783 }
3784
3785 // Add is commutative so we need to check both operands.
3786 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3787 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3788 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3789 SDLoc(Addr), VT);
3790 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3791 LHS.getOperand(1), C1Val),
3792 0);
3793 return true;
3794 }
3795 }
3796
3797 // Don't match add with constants.
3798 // FIXME: Is this profitable for large constants that have 0s in the lower
3799 // 12 bits that we can materialize with LUI?
3800 return false;
3801 }
3802
3803 // Try to match a shift on the RHS.
3804 if (SelectShl(RHS, Index, Scale)) {
3805 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3806 return false;
3807 Base = LHS;
3808 return true;
3809 }
3810
3811 // Try to match a shift on the LHS.
3812 if (SelectShl(LHS, Index, Scale)) {
3813 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3814 return false;
3815 Base = RHS;
3816 return true;
3817 }
3818
3819 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3820 return false;
3821
3822 Base = LHS;
3823 Index = RHS;
3824 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3825 return true;
3826}
3827
3829 unsigned MaxShiftAmount,
3830 unsigned Bits, SDValue &Base,
3831 SDValue &Index,
3832 SDValue &Scale) {
3833 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3834 return false;
3835
3836 if (Index.getOpcode() == ISD::AND) {
3837 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3838 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3839 Index = Index.getOperand(0);
3840 return true;
3841 }
3842 }
3843
3844 return false;
3845}
3846
3848 SDValue &Offset) {
3849 if (Addr.getOpcode() != ISD::ADD)
3850 return false;
3851
3852 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3853 return false;
3854
3855 Base = Addr.getOperand(0);
3856 Offset = Addr.getOperand(1);
3857 return true;
3858}
3859
3861 SDValue &ShAmt) {
3862 ShAmt = N;
3863
3864 // Peek through zext.
3865 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3866 ShAmt = ShAmt.getOperand(0);
3867
3868 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3869 // amount. If there is an AND on the shift amount, we can bypass it if it
3870 // doesn't affect any of those bits.
3871 if (ShAmt.getOpcode() == ISD::AND &&
3872 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3873 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3874
3875 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3876 // mask that covers the bits needed to represent all shift amounts.
3877 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3878 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3879
3880 if (ShMask.isSubsetOf(AndMask)) {
3881 ShAmt = ShAmt.getOperand(0);
3882 } else {
3883 // SimplifyDemandedBits may have optimized the mask so try restoring any
3884 // bits that are known zero.
3885 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3886 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3887 return true;
3888 ShAmt = ShAmt.getOperand(0);
3889 }
3890 }
3891
3892 if (ShAmt.getOpcode() == ISD::ADD &&
3893 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3894 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3895 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3896 // to avoid the ADD.
3897 if (Imm != 0 && Imm % ShiftWidth == 0) {
3898 ShAmt = ShAmt.getOperand(0);
3899 return true;
3900 }
3901 } else if (ShAmt.getOpcode() == ISD::SUB &&
3902 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3903 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3904 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3905 // generate a NEG instead of a SUB of a constant.
3906 if (Imm != 0 && Imm % ShiftWidth == 0) {
3907 SDLoc DL(ShAmt);
3908 EVT VT = ShAmt.getValueType();
3909 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3910 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3911 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3912 ShAmt.getOperand(1));
3913 ShAmt = SDValue(Neg, 0);
3914 return true;
3915 }
3916 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3917 // to generate a NOT instead of a SUB of a constant.
3918 if (Imm % ShiftWidth == ShiftWidth - 1) {
3919 SDLoc DL(ShAmt);
3920 EVT VT = ShAmt.getValueType();
3921 MachineSDNode *Not = CurDAG->getMachineNode(
3922 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3923 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3924 ShAmt = SDValue(Not, 0);
3925 return true;
3926 }
3927 }
3928
3929 return true;
3930}
3931
3932/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3933/// check for equality with 0. This function emits instructions that convert the
3934/// seteq/setne into something that can be compared with 0.
3935/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3936/// ISD::SETNE).
3938 SDValue &Val) {
3939 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3940 "Unexpected condition code!");
3941
3942 // We're looking for a setcc.
3943 if (N->getOpcode() != ISD::SETCC)
3944 return false;
3945
3946 // Must be an equality comparison.
3947 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3948 if (CCVal != ExpectedCCVal)
3949 return false;
3950
3951 SDValue LHS = N->getOperand(0);
3952 SDValue RHS = N->getOperand(1);
3953
3954 if (!LHS.getValueType().isScalarInteger())
3955 return false;
3956
3957 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3958 if (isNullConstant(RHS)) {
3959 Val = LHS;
3960 return true;
3961 }
3962
3963 SDLoc DL(N);
3964
3965 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3966 int64_t CVal = C->getSExtValue();
3967 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3968 // non-zero otherwise.
3969 if (CVal == -2048) {
3970 Val = SDValue(
3971 CurDAG->getMachineNode(
3972 RISCV::XORI, DL, N->getValueType(0), LHS,
3973 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3974 0);
3975 return true;
3976 }
3977 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3978 // if the LHS is equal to the RHS and non-zero otherwise.
3979 if (isInt<12>(CVal) || CVal == 2048) {
3980 unsigned Opc = RISCV::ADDI;
3981 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3982 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3983 Opc = RISCV::ADDIW;
3984 LHS = LHS.getOperand(0);
3985 }
3986
3987 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3988 CurDAG->getSignedTargetConstant(
3989 -CVal, DL, N->getValueType(0))),
3990 0);
3991 return true;
3992 }
3993 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3994 Val = SDValue(
3995 CurDAG->getMachineNode(
3996 RISCV::BINVI, DL, N->getValueType(0), LHS,
3997 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3998 0);
3999 return true;
4000 }
4001 // Same as the addi case above but for larger immediates (signed 26-bit) use
4002 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
4003 // anything which can be done with a single lui as it might be compressible.
4004 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
4005 (CVal & 0xFFF) != 0) {
4006 Val = SDValue(
4007 CurDAG->getMachineNode(
4008 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
4009 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
4010 0);
4011 return true;
4012 }
4013 }
4014
4015 // If nothing else we can XOR the LHS and RHS to produce zero if they are
4016 // equal and a non-zero value if they aren't.
4017 Val = SDValue(
4018 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
4019 return true;
4020}
4021
4023 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4024 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
4025 Val = N.getOperand(0);
4026 return true;
4027 }
4028
4029 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
4030 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
4031 return N;
4032
4033 SDValue N0 = N.getOperand(0);
4034 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4035 N.getConstantOperandVal(1) == ShiftAmt &&
4036 N0.getConstantOperandVal(1) == ShiftAmt)
4037 return N0.getOperand(0);
4038
4039 return N;
4040 };
4041
4042 MVT VT = N.getSimpleValueType();
4043 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
4044 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
4045 return true;
4046 }
4047
4048 return false;
4049}
4050
4052 if (N.getOpcode() == ISD::AND) {
4053 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4054 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
4055 Val = N.getOperand(0);
4056 return true;
4057 }
4058 }
4059 MVT VT = N.getSimpleValueType();
4060 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
4061 if (CurDAG->MaskedValueIsZero(N, Mask)) {
4062 Val = N;
4063 return true;
4064 }
4065
4066 return false;
4067}
4068
4069/// Look for various patterns that can be done with a SHL that can be folded
4070/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
4071/// SHXADD we are trying to match.
4073 SDValue &Val) {
4074 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
4075 SDValue N0 = N.getOperand(0);
4076
4077 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
4078 (LeftShift || N0.getOpcode() == ISD::SRL) &&
4080 uint64_t Mask = N.getConstantOperandVal(1);
4081 unsigned C2 = N0.getConstantOperandVal(1);
4082
4083 unsigned XLen = Subtarget->getXLen();
4084 if (LeftShift)
4085 Mask &= maskTrailingZeros<uint64_t>(C2);
4086 else
4087 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
4088
4089 if (isShiftedMask_64(Mask)) {
4090 unsigned Leading = XLen - llvm::bit_width(Mask);
4091 unsigned Trailing = llvm::countr_zero(Mask);
4092 if (Trailing != ShAmt)
4093 return false;
4094
4095 unsigned Opcode;
4096 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4097 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4098 // followed by a SHXADD with c3 for the X amount.
4099 if (LeftShift && Leading == 0 && C2 < Trailing)
4100 Opcode = RISCV::SRLI;
4101 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4102 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4103 // followed by a SHXADD with c3 for the X amount.
4104 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4105 Opcode = RISCV::SRLIW;
4106 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4107 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4108 // followed by a SHXADD using c3 for the X amount.
4109 else if (!LeftShift && Leading == C2)
4110 Opcode = RISCV::SRLI;
4111 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4112 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4113 // followed by a SHXADD using c3 for the X amount.
4114 else if (!LeftShift && Leading == 32 + C2)
4115 Opcode = RISCV::SRLIW;
4116 else
4117 return false;
4118
4119 SDLoc DL(N);
4120 EVT VT = N.getValueType();
4121 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4122 Val = SDValue(
4123 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
4124 CurDAG->getTargetConstant(ShAmt, DL, VT)),
4125 0);
4126 return true;
4127 }
4128 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4130 uint64_t Mask = N.getConstantOperandVal(1);
4131 unsigned C2 = N0.getConstantOperandVal(1);
4132
4133 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4134 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4135 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4136 // the X amount.
4137 if (isShiftedMask_64(Mask)) {
4138 unsigned XLen = Subtarget->getXLen();
4139 unsigned Leading = XLen - llvm::bit_width(Mask);
4140 unsigned Trailing = llvm::countr_zero(Mask);
4141 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4142 SDLoc DL(N);
4143 EVT VT = N.getValueType();
4144 Val = SDValue(CurDAG->getMachineNode(
4145 RISCV::SRAI, DL, VT, N0.getOperand(0),
4146 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
4147 0);
4148 Val = SDValue(CurDAG->getMachineNode(
4149 RISCV::SRLI, DL, VT, Val,
4150 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
4151 0);
4152 return true;
4153 }
4154 }
4155 }
4156 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4157 (LeftShift || N.getOpcode() == ISD::SRL) &&
4158 isa<ConstantSDNode>(N.getOperand(1))) {
4159 SDValue N0 = N.getOperand(0);
4160 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4162 uint64_t Mask = N0.getConstantOperandVal(1);
4163 if (isShiftedMask_64(Mask)) {
4164 unsigned C1 = N.getConstantOperandVal(1);
4165 unsigned XLen = Subtarget->getXLen();
4166 unsigned Leading = XLen - llvm::bit_width(Mask);
4167 unsigned Trailing = llvm::countr_zero(Mask);
4168 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4169 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4170 if (LeftShift && Leading == 32 && Trailing > 0 &&
4171 (Trailing + C1) == ShAmt) {
4172 SDLoc DL(N);
4173 EVT VT = N.getValueType();
4174 Val = SDValue(CurDAG->getMachineNode(
4175 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4176 CurDAG->getTargetConstant(Trailing, DL, VT)),
4177 0);
4178 return true;
4179 }
4180 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4181 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4182 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4183 (Trailing - C1) == ShAmt) {
4184 SDLoc DL(N);
4185 EVT VT = N.getValueType();
4186 Val = SDValue(CurDAG->getMachineNode(
4187 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4188 CurDAG->getTargetConstant(Trailing, DL, VT)),
4189 0);
4190 return true;
4191 }
4192 }
4193 }
4194 }
4195
4196 return false;
4197}
4198
4199/// Look for various patterns that can be done with a SHL that can be folded
4200/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4201/// SHXADD_UW we are trying to match.
4203 SDValue &Val) {
4204 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
4205 N.hasOneUse()) {
4206 SDValue N0 = N.getOperand(0);
4207 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4208 N0.hasOneUse()) {
4209 uint64_t Mask = N.getConstantOperandVal(1);
4210 unsigned C2 = N0.getConstantOperandVal(1);
4211
4212 Mask &= maskTrailingZeros<uint64_t>(C2);
4213
4214 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4215 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4216 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4217 if (isShiftedMask_64(Mask)) {
4218 unsigned Leading = llvm::countl_zero(Mask);
4219 unsigned Trailing = llvm::countr_zero(Mask);
4220 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4221 SDLoc DL(N);
4222 EVT VT = N.getValueType();
4223 Val = SDValue(CurDAG->getMachineNode(
4224 RISCV::SLLI, DL, VT, N0.getOperand(0),
4225 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
4226 0);
4227 return true;
4228 }
4229 }
4230 }
4231 }
4232
4233 return false;
4234}
4235
4237 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4238 if (N->getFlags().hasDisjoint())
4239 return true;
4240 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
4241}
4242
4243bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4244 SDValue N, SDValue &Val) {
4245 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
4246 /*CompressionCost=*/true);
4247 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
4248 /*CompressionCost=*/true);
4249 if (OrigCost <= Cost)
4250 return false;
4251
4252 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
4253 return true;
4254}
4255
4257 if (!isa<ConstantSDNode>(N))
4258 return false;
4259 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4260 if ((Imm >> 31) != 1)
4261 return false;
4262
4263 for (const SDNode *U : N->users()) {
4264 switch (U->getOpcode()) {
4265 case ISD::ADD:
4266 break;
4267 case ISD::OR:
4268 if (orDisjoint(U))
4269 break;
4270 return false;
4271 default:
4272 return false;
4273 }
4274 }
4275
4276 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
4277}
4278
4280 if (!isa<ConstantSDNode>(N))
4281 return false;
4282 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4283 if (isInt<32>(Imm))
4284 return false;
4285 if (Imm == INT64_MIN)
4286 return false;
4287
4288 for (const SDNode *U : N->users()) {
4289 switch (U->getOpcode()) {
4290 case ISD::ADD:
4291 break;
4292 case RISCVISD::VMV_V_X_VL:
4293 if (!all_of(U->users(), [](const SDNode *V) {
4294 return V->getOpcode() == ISD::ADD ||
4295 V->getOpcode() == RISCVISD::ADD_VL;
4296 }))
4297 return false;
4298 break;
4299 default:
4300 return false;
4301 }
4302 }
4303
4304 return selectImm64IfCheaper(-Imm, Imm, N, Val);
4305}
4306
4308 if (!isa<ConstantSDNode>(N))
4309 return false;
4310 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4311
4312 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4313 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4314 return false;
4315
4316 // Abandon this transform if the constant is needed elsewhere.
4317 for (const SDNode *U : N->users()) {
4318 switch (U->getOpcode()) {
4319 case ISD::AND:
4320 case ISD::OR:
4321 case ISD::XOR:
4322 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4323 return false;
4324 break;
4325 case RISCVISD::VMV_V_X_VL:
4326 if (!Subtarget->hasStdExtZvkb())
4327 return false;
4328 if (!all_of(U->users(), [](const SDNode *V) {
4329 return V->getOpcode() == ISD::AND ||
4330 V->getOpcode() == RISCVISD::AND_VL;
4331 }))
4332 return false;
4333 break;
4334 default:
4335 return false;
4336 }
4337 }
4338
4339 if (isInt<32>(Imm)) {
4340 Val =
4341 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
4342 return true;
4343 }
4344
4345 // For 64-bit constants, the instruction sequences get complex,
4346 // so we select inverted only if it's cheaper.
4347 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4348}
4349
4350static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4351 unsigned Bits,
4352 const TargetInstrInfo *TII) {
4353 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4354
4355 if (!MCOpcode)
4356 return false;
4357
4358 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4359 const uint64_t TSFlags = MCID.TSFlags;
4360 if (!RISCVII::hasSEWOp(TSFlags))
4361 return false;
4362 assert(RISCVII::hasVLOp(TSFlags));
4363
4364 unsigned ChainOpIdx = User->getNumOperands() - 1;
4365 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4366 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4367 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4368 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4369
4370 if (UserOpNo == VLIdx)
4371 return false;
4372
4373 auto NumDemandedBits =
4374 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4375 return NumDemandedBits && Bits >= *NumDemandedBits;
4376}
4377
4378// Return true if all users of this SDNode* only consume the lower \p Bits.
4379// This can be used to form W instructions for add/sub/mul/shl even when the
4380// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4381// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4382// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4383// the add/sub/mul/shl to become non-W instructions. By checking the users we
4384// may be able to use a W instruction and CSE with the other instruction if
4385// this has happened. We could try to detect that the CSE opportunity exists
4386// before doing this, but that would be more complicated.
4388 const unsigned Depth) const {
4389 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4390 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4391 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4392 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4393 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4394 isa<ConstantSDNode>(Node) || Depth != 0) &&
4395 "Unexpected opcode");
4396
4398 return false;
4399
4400 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4401 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4402 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4403 return false;
4404
4405 for (SDUse &Use : Node->uses()) {
4406 SDNode *User = Use.getUser();
4407 // Users of this node should have already been instruction selected
4408 if (!User->isMachineOpcode())
4409 return false;
4410
4411 // TODO: Add more opcodes?
4412 switch (User->getMachineOpcode()) {
4413 default:
4415 break;
4416 return false;
4417 case RISCV::ADDW:
4418 case RISCV::ADDIW:
4419 case RISCV::SUBW:
4420 case RISCV::MULW:
4421 case RISCV::SLLW:
4422 case RISCV::SLLIW:
4423 case RISCV::SRAW:
4424 case RISCV::SRAIW:
4425 case RISCV::SRLW:
4426 case RISCV::SRLIW:
4427 case RISCV::DIVW:
4428 case RISCV::DIVUW:
4429 case RISCV::REMW:
4430 case RISCV::REMUW:
4431 case RISCV::ROLW:
4432 case RISCV::RORW:
4433 case RISCV::RORIW:
4434 case RISCV::CLSW:
4435 case RISCV::CLZW:
4436 case RISCV::CTZW:
4437 case RISCV::CPOPW:
4438 case RISCV::SLLI_UW:
4439 case RISCV::ABSW:
4440 case RISCV::FMV_W_X:
4441 case RISCV::FCVT_H_W:
4442 case RISCV::FCVT_H_W_INX:
4443 case RISCV::FCVT_H_WU:
4444 case RISCV::FCVT_H_WU_INX:
4445 case RISCV::FCVT_S_W:
4446 case RISCV::FCVT_S_W_INX:
4447 case RISCV::FCVT_S_WU:
4448 case RISCV::FCVT_S_WU_INX:
4449 case RISCV::FCVT_D_W:
4450 case RISCV::FCVT_D_W_INX:
4451 case RISCV::FCVT_D_WU:
4452 case RISCV::FCVT_D_WU_INX:
4453 case RISCV::TH_REVW:
4454 case RISCV::TH_SRRIW:
4455 if (Bits >= 32)
4456 break;
4457 return false;
4458 case RISCV::SLL:
4459 case RISCV::SRA:
4460 case RISCV::SRL:
4461 case RISCV::ROL:
4462 case RISCV::ROR:
4463 case RISCV::BSET:
4464 case RISCV::BCLR:
4465 case RISCV::BINV:
4466 // Shift amount operands only use log2(Xlen) bits.
4467 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4468 break;
4469 return false;
4470 case RISCV::SLLI:
4471 // SLLI only uses the lower (XLen - ShAmt) bits.
4472 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4473 break;
4474 return false;
4475 case RISCV::ANDI:
4476 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4477 break;
4478 goto RecCheck;
4479 case RISCV::ORI: {
4480 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4481 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4482 break;
4483 [[fallthrough]];
4484 }
4485 case RISCV::AND:
4486 case RISCV::OR:
4487 case RISCV::XOR:
4488 case RISCV::XORI:
4489 case RISCV::ANDN:
4490 case RISCV::ORN:
4491 case RISCV::XNOR:
4492 case RISCV::SH1ADD:
4493 case RISCV::SH2ADD:
4494 case RISCV::SH3ADD:
4495 RecCheck:
4496 if (hasAllNBitUsers(User, Bits, Depth + 1))
4497 break;
4498 return false;
4499 case RISCV::SRLI: {
4500 unsigned ShAmt = User->getConstantOperandVal(1);
4501 // If we are shifting right by less than Bits, and users don't demand any
4502 // bits that were shifted into [Bits-1:0], then we can consider this as an
4503 // N-Bit user.
4504 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4505 break;
4506 return false;
4507 }
4508 case RISCV::SEXT_B:
4509 case RISCV::PACKH:
4510 if (Bits >= 8)
4511 break;
4512 return false;
4513 case RISCV::SEXT_H:
4514 case RISCV::FMV_H_X:
4515 case RISCV::ZEXT_H_RV32:
4516 case RISCV::ZEXT_H_RV64:
4517 case RISCV::PACKW:
4518 if (Bits >= 16)
4519 break;
4520 return false;
4521 case RISCV::PACK:
4522 if (Bits >= (Subtarget->getXLen() / 2))
4523 break;
4524 return false;
4525 case RISCV::PPAIRE_H:
4526 // If only the lower 32-bits of the result are used, then only the
4527 // lower 16 bits of the inputs are used.
4528 if (Bits >= 16 && hasAllNBitUsers(User, 32, Depth + 1))
4529 break;
4530 return false;
4531 case RISCV::ADD_UW:
4532 case RISCV::SH1ADD_UW:
4533 case RISCV::SH2ADD_UW:
4534 case RISCV::SH3ADD_UW:
4535 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4536 // 32 bits.
4537 if (Use.getOperandNo() == 0 && Bits >= 32)
4538 break;
4539 return false;
4540 case RISCV::SB:
4541 if (Use.getOperandNo() == 0 && Bits >= 8)
4542 break;
4543 return false;
4544 case RISCV::SH:
4545 if (Use.getOperandNo() == 0 && Bits >= 16)
4546 break;
4547 return false;
4548 case RISCV::SW:
4549 if (Use.getOperandNo() == 0 && Bits >= 32)
4550 break;
4551 return false;
4552 case RISCV::TH_EXT:
4553 case RISCV::TH_EXTU: {
4554 unsigned Msb = User->getConstantOperandVal(1);
4555 unsigned Lsb = User->getConstantOperandVal(2);
4556 // Behavior of Msb < Lsb is not well documented.
4557 if (Msb >= Lsb && Bits > Msb)
4558 break;
4559 return false;
4560 }
4561 }
4562 }
4563
4564 return true;
4565}
4566
4567// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4569 SDValue &Shl2) {
4570 auto *C = dyn_cast<ConstantSDNode>(N);
4571 if (!C)
4572 return false;
4573
4574 int64_t Offset = C->getSExtValue();
4575 for (unsigned Shift = 0; Shift < 4; Shift++) {
4576 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4577 EVT VT = N->getValueType(0);
4578 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4579 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4580 return true;
4581 }
4582 }
4583
4584 return false;
4585}
4586
4587// Select VL as a 5 bit immediate or a value that will become a register. This
4588// allows us to choose between VSETIVLI or VSETVLI later.
4590 auto *C = dyn_cast<ConstantSDNode>(N);
4591 if (C && isUInt<5>(C->getZExtValue())) {
4592 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4593 N->getValueType(0));
4594 } else if (C && C->isAllOnes()) {
4595 // Treat all ones as VLMax.
4596 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4597 N->getValueType(0));
4598 } else if (isa<RegisterSDNode>(N) &&
4599 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4600 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4601 // as the register class. Convert X0 to a special immediate to pass the
4602 // MachineVerifier. This is recognized specially by the vsetvli insertion
4603 // pass.
4604 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4605 N->getValueType(0));
4606 } else {
4607 VL = N;
4608 }
4609
4610 return true;
4611}
4612
4614 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4615 if (!N.getOperand(0).isUndef())
4616 return SDValue();
4617 N = N.getOperand(1);
4618 }
4619 SDValue Splat = N;
4620 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4621 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4622 !Splat.getOperand(0).isUndef())
4623 return SDValue();
4624 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4625 return Splat;
4626}
4627
4630 if (!Splat)
4631 return false;
4632
4633 SplatVal = Splat.getOperand(1);
4634 return true;
4635}
4636
4638 SelectionDAG &DAG,
4639 const RISCVSubtarget &Subtarget,
4640 std::function<bool(int64_t)> ValidateImm,
4641 bool Decrement = false) {
4643 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4644 return false;
4645
4646 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4647 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4648 "Unexpected splat operand type");
4649
4650 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4651 // type is wider than the resulting vector element type: an implicit
4652 // truncation first takes place. Therefore, perform a manual
4653 // truncation/sign-extension in order to ignore any truncated bits and catch
4654 // any zero-extended immediate.
4655 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4656 // sign-extending to (XLenVT -1).
4657 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4658
4659 int64_t SplatImm = SplatConst.getSExtValue();
4660
4661 if (!ValidateImm(SplatImm))
4662 return false;
4663
4664 if (Decrement)
4665 SplatImm -= 1;
4666
4667 SplatVal =
4668 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4669 return true;
4670}
4671
4673 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4674 [](int64_t Imm) { return isInt<5>(Imm); });
4675}
4676
4678 return selectVSplatImmHelper(
4679 N, SplatVal, *CurDAG, *Subtarget,
4680 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4681 /*Decrement=*/true);
4682}
4683
4685 return selectVSplatImmHelper(
4686 N, SplatVal, *CurDAG, *Subtarget,
4687 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4688 /*Decrement=*/false);
4689}
4690
4692 SDValue &SplatVal) {
4693 return selectVSplatImmHelper(
4694 N, SplatVal, *CurDAG, *Subtarget,
4695 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4696 /*Decrement=*/true);
4697}
4698
4700 SDValue &SplatVal) {
4701 return selectVSplatImmHelper(
4702 N, SplatVal, *CurDAG, *Subtarget,
4703 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4704}
4705
4708 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4709}
4710
4712 auto IsExtOrTrunc = [](SDValue N) {
4713 switch (N->getOpcode()) {
4714 case ISD::SIGN_EXTEND:
4715 case ISD::ZERO_EXTEND:
4716 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4717 // inactive elements will be undef.
4718 case RISCVISD::TRUNCATE_VECTOR_VL:
4719 case RISCVISD::VSEXT_VL:
4720 case RISCVISD::VZEXT_VL:
4721 return true;
4722 default:
4723 return false;
4724 }
4725 };
4726
4727 // We can have multiple nested nodes, so unravel them all if needed.
4728 while (IsExtOrTrunc(N)) {
4729 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4730 return false;
4731 N = N->getOperand(0);
4732 }
4733
4734 return selectVSplat(N, SplatVal);
4735}
4736
4738 // Allow bitcasts from XLenVT -> FP.
4739 if (N.getOpcode() == ISD::BITCAST &&
4740 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4741 Imm = N.getOperand(0);
4742 return true;
4743 }
4744 // Allow moves from XLenVT to FP.
4745 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4746 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4747 Imm = N.getOperand(0);
4748 return true;
4749 }
4750
4751 // Otherwise, look for FP constants that can materialized with scalar int.
4753 if (!CFP)
4754 return false;
4755 const APFloat &APF = CFP->getValueAPF();
4756 // td can handle +0.0 already.
4757 if (APF.isPosZero())
4758 return false;
4759
4760 MVT VT = CFP->getSimpleValueType(0);
4761
4762 MVT XLenVT = Subtarget->getXLenVT();
4763 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4764 assert(APF.isNegZero() && "Unexpected constant.");
4765 return false;
4766 }
4767 SDLoc DL(N);
4768 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4769 *Subtarget);
4770 return true;
4771}
4772
4774 SDValue &Imm) {
4775 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4776 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4777
4778 if (!isInt<5>(ImmVal))
4779 return false;
4780
4781 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4782 Subtarget->getXLenVT());
4783 return true;
4784 }
4785
4786 return false;
4787}
4788
4789// Match XOR with a VMSET_VL operand. Return the other operand.
4791 if (N.getOpcode() != ISD::XOR)
4792 return false;
4793
4794 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4795 Res = N.getOperand(1);
4796 return true;
4797 }
4798
4799 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4800 Res = N.getOperand(0);
4801 return true;
4802 }
4803
4804 return false;
4805}
4806
4807// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4808// matches the parent's VL. Return the other operand of the VMXOR_VL.
4810 SDValue &Res) {
4811 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4812 return false;
4813
4814 assert(Parent &&
4815 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4816 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4817 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4818 "Unexpected parent");
4819
4820 // The VL should match the parent.
4821 if (Parent->getOperand(2) != N->getOperand(2))
4822 return false;
4823
4824 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4825 Res = N.getOperand(1);
4826 return true;
4827 }
4828
4829 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4830 Res = N.getOperand(0);
4831 return true;
4832 }
4833
4834 return false;
4835}
4836
4837// Try to remove sext.w if the input is a W instruction or can be made into
4838// a W instruction cheaply.
4839bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4840 // Look for the sext.w pattern, addiw rd, rs1, 0.
4841 if (N->getMachineOpcode() != RISCV::ADDIW ||
4842 !isNullConstant(N->getOperand(1)))
4843 return false;
4844
4845 SDValue N0 = N->getOperand(0);
4846 if (!N0.isMachineOpcode())
4847 return false;
4848
4849 switch (N0.getMachineOpcode()) {
4850 default:
4851 break;
4852 case RISCV::ADD:
4853 case RISCV::ADDI:
4854 case RISCV::SUB:
4855 case RISCV::MUL:
4856 case RISCV::SLLI: {
4857 // Convert sext.w+add/sub/mul to their W instructions. This will create
4858 // a new independent instruction. This improves latency.
4859 unsigned Opc;
4860 switch (N0.getMachineOpcode()) {
4861 default:
4862 llvm_unreachable("Unexpected opcode!");
4863 case RISCV::ADD: Opc = RISCV::ADDW; break;
4864 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4865 case RISCV::SUB: Opc = RISCV::SUBW; break;
4866 case RISCV::MUL: Opc = RISCV::MULW; break;
4867 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4868 }
4869
4870 SDValue N00 = N0.getOperand(0);
4871 SDValue N01 = N0.getOperand(1);
4872
4873 // Shift amount needs to be uimm5.
4874 if (N0.getMachineOpcode() == RISCV::SLLI &&
4875 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4876 break;
4877
4878 SDNode *Result =
4879 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4880 N00, N01);
4881 ReplaceUses(N, Result);
4882 return true;
4883 }
4884 case RISCV::ADDW:
4885 case RISCV::ADDIW:
4886 case RISCV::SUBW:
4887 case RISCV::MULW:
4888 case RISCV::SLLIW:
4889 case RISCV::PACKW:
4890 case RISCV::TH_MULAW:
4891 case RISCV::TH_MULAH:
4892 case RISCV::TH_MULSW:
4893 case RISCV::TH_MULSH:
4894 if (N0.getValueType() == MVT::i32)
4895 break;
4896
4897 // Result is already sign extended just remove the sext.w.
4898 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4899 ReplaceUses(N, N0.getNode());
4900 return true;
4901 }
4902
4903 return false;
4904}
4905
4906static bool usesAllOnesMask(SDValue MaskOp) {
4907 const auto IsVMSet = [](unsigned Opc) {
4908 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4909 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4910 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4911 Opc == RISCV::PseudoVMSET_M_B8;
4912 };
4913
4914 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4915 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4916 // assume that it's all-ones? Same applies to its VL.
4917 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4918}
4919
4920static bool isImplicitDef(SDValue V) {
4921 if (!V.isMachineOpcode())
4922 return false;
4923 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4924 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4925 if (!isImplicitDef(V.getOperand(I)))
4926 return false;
4927 return true;
4928 }
4929 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4930}
4931
4932// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4933// corresponding "unmasked" pseudo versions.
4934bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4935 const RISCV::RISCVMaskedPseudoInfo *I =
4936 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4937 if (!I)
4938 return false;
4939
4940 unsigned MaskOpIdx = I->MaskOpIdx;
4941 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4942 return false;
4943
4944 // There are two classes of pseudos in the table - compares and
4945 // everything else. See the comment on RISCVMaskedPseudo for details.
4946 const unsigned Opc = I->UnmaskedPseudo;
4947 const MCInstrDesc &MCID = TII->get(Opc);
4948 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4949
4950 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4951 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4952
4953 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4955 "Unmasked pseudo has policy but masked pseudo doesn't?");
4956 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4957 "Unexpected pseudo structure");
4958 assert(!(HasPassthru && !MaskedHasPassthru) &&
4959 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4960
4962 // Skip the passthru operand at index 0 if the unmasked don't have one.
4963 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4964 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4965 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4966 bool HasChainOp =
4967 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4968 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4969 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4970 // Skip the mask
4971 SDValue Op = N->getOperand(I);
4972 if (I == MaskOpIdx)
4973 continue;
4974 if (DropPolicy && I == LastOpNum)
4975 continue;
4976 Ops.push_back(Op);
4977 }
4978
4979 MachineSDNode *Result =
4980 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4981
4982 if (!N->memoperands_empty())
4983 CurDAG->setNodeMemRefs(Result, N->memoperands());
4984
4985 Result->setFlags(N->getFlags());
4986 ReplaceUses(N, Result);
4987
4988 return true;
4989}
4990
4991/// If our passthru is an implicit_def, use noreg instead. This side
4992/// steps issues with MachineCSE not being able to CSE expressions with
4993/// IMPLICIT_DEF operands while preserving the semantic intent. See
4994/// pr64282 for context. Note that this transform is the last one
4995/// performed at ISEL DAG to DAG.
4996bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4997 bool MadeChange = false;
4998 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4999
5000 while (Position != CurDAG->allnodes_begin()) {
5001 SDNode *N = &*--Position;
5002 if (N->use_empty() || !N->isMachineOpcode())
5003 continue;
5004
5005 const unsigned Opc = N->getMachineOpcode();
5006 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
5008 !isImplicitDef(N->getOperand(0)))
5009 continue;
5010
5012 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
5013 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
5014 SDValue Op = N->getOperand(I);
5015 Ops.push_back(Op);
5016 }
5017
5018 MachineSDNode *Result =
5019 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
5020 Result->setFlags(N->getFlags());
5021 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
5022 ReplaceUses(N, Result);
5023 MadeChange = true;
5024 }
5025 return MadeChange;
5026}
5027
5028
5029// This pass converts a legalized DAG into a RISCV-specific DAG, ready
5030// for instruction scheduling.
5032 CodeGenOptLevel OptLevel) {
5033 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
5034}
5035
5037
5042
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
static std::pair< SDValue, SDValue > extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
static bool isApplicableToPLIOrPLUI(int Val)
#define INST_ALL_NF_CASE(NAME)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
bool isPosZero() const
Definition APFloat.h:1549
bool isNegZero() const
Definition APFloat.h:1550
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI bool isSplat(unsigned SplatSizeInBits) const
Check if the APInt consists of a repeated bit pattern.
Definition APInt.cpp:631
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:475
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool selectVMNOT_VLOp(SDNode *Parent, SDValue N, SDValue &Res)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool tryWideningMulAcc(SDNode *Node, const SDLoc &DL)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVMNOTOp(SDValue N, SDValue &Res)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
iterator_range< user_iterator > users()
Definition Value.h:426
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.