LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
91 const RISCVSubtarget &STI)
92 : TargetLowering(TM), Subtarget(STI) {
93
94 RISCVABI::ABI ABI = Subtarget.getTargetABI();
95 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
96
97 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
98 !Subtarget.hasStdExtF()) {
99 errs() << "Hard-float 'f' ABI can't be used for a target that "
100 "doesn't support the F instruction set extension (ignoring "
101 "target-abi)\n";
102 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
103 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
104 !Subtarget.hasStdExtD()) {
105 errs() << "Hard-float 'd' ABI can't be used for a target that "
106 "doesn't support the D instruction set extension (ignoring "
107 "target-abi)\n";
108 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
109 }
110
111 switch (ABI) {
112 default:
113 reportFatalUsageError("Don't know how to lower this ABI");
122 break;
123 }
124
125 MVT XLenVT = Subtarget.getXLenVT();
126
127 // Set up the register classes.
128 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
129
130 if (Subtarget.hasStdExtZfhmin())
131 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
132 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
133 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
134 if (Subtarget.hasStdExtF())
135 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
136 if (Subtarget.hasStdExtD())
137 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
138 if (Subtarget.hasStdExtZhinxmin())
139 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
140 if (Subtarget.hasStdExtZfinx())
141 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
142 if (Subtarget.hasStdExtZdinx()) {
143 if (Subtarget.is64Bit())
144 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
145 else
146 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
147 }
148
149 static const MVT::SimpleValueType BoolVecVTs[] = {
150 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
151 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
152 static const MVT::SimpleValueType IntVecVTs[] = {
153 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
154 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
155 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
156 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
157 MVT::nxv4i64, MVT::nxv8i64};
158 static const MVT::SimpleValueType F16VecVTs[] = {
159 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
160 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
161 static const MVT::SimpleValueType BF16VecVTs[] = {
162 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
163 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
164 static const MVT::SimpleValueType F32VecVTs[] = {
165 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
166 static const MVT::SimpleValueType F64VecVTs[] = {
167 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
168 static const MVT::SimpleValueType VecTupleVTs[] = {
169 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
170 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
171 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
172 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
173 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
174 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
175 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
176 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
177 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
178 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
179 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
180
181 if (Subtarget.hasVInstructions()) {
182 auto addRegClassForRVV = [this](MVT VT) {
183 // Disable the smallest fractional LMUL types if ELEN is less than
184 // RVVBitsPerBlock.
185 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
186 if (VT.getVectorMinNumElements() < MinElts)
187 return;
188
189 unsigned Size = VT.getSizeInBits().getKnownMinValue();
190 const TargetRegisterClass *RC;
192 RC = &RISCV::VRRegClass;
193 else if (Size == 2 * RISCV::RVVBitsPerBlock)
194 RC = &RISCV::VRM2RegClass;
195 else if (Size == 4 * RISCV::RVVBitsPerBlock)
196 RC = &RISCV::VRM4RegClass;
197 else if (Size == 8 * RISCV::RVVBitsPerBlock)
198 RC = &RISCV::VRM8RegClass;
199 else
200 llvm_unreachable("Unexpected size");
201
202 addRegisterClass(VT, RC);
203 };
204
205 for (MVT VT : BoolVecVTs)
206 addRegClassForRVV(VT);
207 for (MVT VT : IntVecVTs) {
208 if (VT.getVectorElementType() == MVT::i64 &&
209 !Subtarget.hasVInstructionsI64())
210 continue;
211 addRegClassForRVV(VT);
212 }
213
214 if (Subtarget.hasVInstructionsF16Minimal() ||
215 Subtarget.hasVendorXAndesVPackFPH())
216 for (MVT VT : F16VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.hasVInstructionsBF16Minimal() ||
220 Subtarget.hasVendorXAndesVBFHCvt())
221 for (MVT VT : BF16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsF32())
225 for (MVT VT : F32VecVTs)
226 addRegClassForRVV(VT);
227
228 if (Subtarget.hasVInstructionsF64())
229 for (MVT VT : F64VecVTs)
230 addRegClassForRVV(VT);
231
232 if (Subtarget.useRVVForFixedLengthVectors()) {
233 auto addRegClassForFixedVectors = [this](MVT VT) {
234 MVT ContainerVT = getContainerForFixedLengthVector(VT);
235 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
236 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
237 addRegisterClass(VT, TRI.getRegClass(RCID));
238 };
240 if (useRVVForFixedLengthVectorVT(VT))
241 addRegClassForFixedVectors(VT);
242
244 if (useRVVForFixedLengthVectorVT(VT))
245 addRegClassForFixedVectors(VT);
246 }
247
248 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
249 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
250 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
251 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
252 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
253 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
255 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
277 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
278 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
279 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
280 }
281
282 // Compute derived properties from the register classes.
284
286
288 MVT::i1, Promote);
289 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
291 MVT::i1, Promote);
292
293 // TODO: add all necessary setOperationAction calls.
294 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
295
296 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
297 setOperationAction(ISD::BR_CC, XLenVT, Expand);
298 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
300
305 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
308 }
309
310 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
311
312 setOperationAction(ISD::VASTART, MVT::Other, Custom);
313 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
314
315 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
316 !Subtarget.hasVendorXAndesPerf())
318
320
321 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() &&
322 !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
323 !Subtarget.hasVendorXAndesPerf() &&
324 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
325 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
326
327 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
328 setOperationAction(ISD::LOAD, MVT::i64, Custom);
329 setOperationAction(ISD::STORE, MVT::i64, Custom);
330 }
331
332 if (Subtarget.is64Bit()) {
334
335 setOperationAction(ISD::LOAD, MVT::i32, Custom);
337 MVT::i32, Custom);
339 if (!Subtarget.hasStdExtZbb())
342 Custom);
344 }
345 if (!Subtarget.hasStdExtZmmul()) {
347 } else if (Subtarget.is64Bit()) {
350 } else {
352 }
353
354 if (!Subtarget.hasStdExtM()) {
356 Expand);
357 } else if (Subtarget.is64Bit()) {
359 {MVT::i8, MVT::i16, MVT::i32}, Custom);
360 }
361
364 Expand);
365
367 Custom);
368
369 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
370 if (Subtarget.is64Bit())
372 } else if (Subtarget.hasVendorXTHeadBb()) {
373 if (Subtarget.is64Bit())
376 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
378 } else {
380 }
381
383 Subtarget.hasREV8Like() ? Legal : Expand);
384
385 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
386 !Subtarget.is64Bit()) {
388 } else {
389 // Zbkb can use rev8+brev8 to implement bitreverse.
391 Subtarget.hasStdExtZbkb() ? Custom : Expand);
392 if (Subtarget.hasStdExtZbkb())
394 }
395
396 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() ||
397 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
399 Legal);
400 }
401
402 if (Subtarget.hasCTZLike()) {
403 if (Subtarget.is64Bit())
405 } else {
407 // If have a CLZW, but not CTZW, custom promote i32.
408 if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
410 }
411
412 if (!Subtarget.hasCPOPLike()) {
413 // TODO: These should be set to LibCall, but this currently breaks
414 // the Linux kernel build. See #101786. Lacks i128 tests, too.
415 if (Subtarget.is64Bit())
417 else
420 }
421
422 if (Subtarget.hasCLZLike()) {
423 // We need the custom lowering to make sure that the resulting sequence
424 // for the 32bit case is efficient on 64bit targets.
425 // Use default promotion for i32 without Zbb.
426 if (Subtarget.is64Bit() &&
427 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
429 } else {
431 }
432
433 if (Subtarget.hasStdExtP() ||
434 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
436 if (Subtarget.is64Bit())
438 } else if (Subtarget.hasShortForwardBranchOpt()) {
439 // We can use PseudoCCSUB to implement ABS.
441 } else if (Subtarget.is64Bit()) {
443 }
444
445 if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
447
448 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
455 }
456
457 static const unsigned FPLegalNodeTypes[] = {
458 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
459 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
460 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
465
466 static const ISD::CondCode FPCCToExpand[] = {
470
471 static const unsigned FPOpToExpand[] = {
472 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
473 ISD::FREM};
474
475 static const unsigned FPRndMode[] = {
476 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
477 ISD::FROUNDEVEN};
478
479 static const unsigned ZfhminZfbfminPromoteOps[] = {
480 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
481 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
486 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
487 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
488 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
489
490 if (Subtarget.hasStdExtZfbfmin()) {
491 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
495 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
496 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
498 setOperationAction(ISD::FABS, MVT::bf16, Custom);
499 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
503 }
504
505 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
506 if (Subtarget.hasStdExtZfhOrZhinx()) {
507 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
508 setOperationAction(FPRndMode, MVT::f16,
509 Subtarget.hasStdExtZfa() ? Legal : Custom);
511 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
512 Subtarget.hasStdExtZfa() ? Legal : Custom);
513 if (Subtarget.hasStdExtZfa())
515 } else {
516 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
517 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
518 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
521 setOperationAction(Op, MVT::f16, Custom);
522 setOperationAction(ISD::FABS, MVT::f16, Custom);
523 setOperationAction(ISD::FNEG, MVT::f16, Custom);
527 }
528
529 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
530
533 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
536 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
537
539 ISD::FNEARBYINT, MVT::f16,
540 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
541 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
542 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
543 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
544 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF},
545 MVT::f16, Promote);
546
547 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
548 // complete support for all operations in LegalizeDAG.
553 MVT::f16, Promote);
554
555 // We need to custom promote this.
556 if (Subtarget.is64Bit())
557 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
558 }
559
560 if (Subtarget.hasStdExtFOrZfinx()) {
561 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
562 setOperationAction(FPRndMode, MVT::f32,
563 Subtarget.hasStdExtZfa() ? Legal : Custom);
564 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
567 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
568 setOperationAction(FPOpToExpand, MVT::f32, Expand);
569 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
570 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
571 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
572 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
574 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
575 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
576 Subtarget.isSoftFPABI() ? LibCall : Custom);
577 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
578 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
579 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
580 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
581
582 if (Subtarget.hasStdExtZfa()) {
584 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
585 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
586 } else {
587 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
588 }
589 }
590
591 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
592 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
593
594 if (Subtarget.hasStdExtDOrZdinx()) {
595 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
596
597 if (!Subtarget.is64Bit())
598 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
599
600 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
601 !Subtarget.is64Bit()) {
602 setOperationAction(ISD::LOAD, MVT::f64, Custom);
603 setOperationAction(ISD::STORE, MVT::f64, Custom);
604 }
605
606 if (Subtarget.hasStdExtZfa()) {
608 setOperationAction(FPRndMode, MVT::f64, Legal);
609 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
610 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
611 } else {
612 if (Subtarget.is64Bit())
613 setOperationAction(FPRndMode, MVT::f64, Custom);
614
615 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
616 }
617
620 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
623 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
624 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
625 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
626 setOperationAction(FPOpToExpand, MVT::f64, Expand);
627 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
628 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
629 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
630 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
632 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
633 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
634 Subtarget.isSoftFPABI() ? LibCall : Custom);
635 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
636 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
637 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
638 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
639 }
640
641 if (Subtarget.is64Bit()) {
644 MVT::i32, Custom);
645 setOperationAction(ISD::LROUND, MVT::i32, Custom);
646 }
647
648 if (Subtarget.hasStdExtFOrZfinx()) {
650 Custom);
651
652 // f16/bf16 require custom handling.
654 Custom);
656 Custom);
657
659 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
660 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
661 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
662 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
663 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
664 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
665 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
666 }
667
670 XLenVT, Custom);
671
673
674 if (Subtarget.is64Bit())
676
677 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
678 // Unfortunately this can't be determined just from the ISA naming string.
679 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
680 Subtarget.is64Bit() ? Legal : Custom);
681 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
682 Subtarget.is64Bit() ? Legal : Custom);
683
684 if (Subtarget.is64Bit()) {
685 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
686 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
687 }
688
689 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
691 if (Subtarget.is64Bit())
693
694 if (Subtarget.hasVendorXMIPSCBOP())
695 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
696 else if (Subtarget.hasStdExtZicbop())
697 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
698
699 if (Subtarget.hasStdExtZalrsc()) {
700 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
701 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
703 else
705 } else if (Subtarget.hasForcedAtomics()) {
706 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
707 } else {
709 }
710
711 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
712
714
715 if (getTargetMachine().getTargetTriple().isOSLinux()) {
716 // Custom lowering of llvm.clear_cache.
718 }
719
720 if (Subtarget.hasVInstructions()) {
722
723 setOperationAction(ISD::VSCALE, XLenVT, Custom);
724
725 // RVV intrinsics may have illegal operands.
726 // We also need to custom legalize vmv.x.s.
729 {MVT::i8, MVT::i16}, Custom);
730 if (Subtarget.is64Bit())
732 MVT::i32, Custom);
733 else
735 MVT::i64, Custom);
736
738 MVT::Other, Custom);
739
740 static const unsigned IntegerVPOps[] = {
741 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
742 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
743 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
744 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
745 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
746 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
747 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
748 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
749 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
750 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
751 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
752 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
753 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
754 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
755 ISD::EXPERIMENTAL_VP_SPLAT};
756
757 static const unsigned FloatingPointVPOps[] = {
758 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
759 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
760 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
761 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
762 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
763 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
764 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
765 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
766 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
767 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
768 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
769 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
770 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
771
772 static const unsigned IntegerVecReduceOps[] = {
773 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
774 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
775 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
776
777 static const unsigned FloatingPointVecReduceOps[] = {
778 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
779 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
780
781 static const unsigned FloatingPointLibCallOps[] = {
782 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
783 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
784
785 if (!Subtarget.is64Bit()) {
786 // We must custom-lower certain vXi64 operations on RV32 due to the vector
787 // element type being illegal.
789 MVT::i64, Custom);
790
791 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
792
793 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
794 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
795 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
796 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
797 MVT::i64, Custom);
798 }
799
800 for (MVT VT : BoolVecVTs) {
801 if (!isTypeLegal(VT))
802 continue;
803
805
806 // Mask VTs are custom-expanded into a series of standard nodes
810 VT, Custom);
811
813 Custom);
814
816 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
817 Expand);
818 setOperationAction(ISD::VP_MERGE, VT, Custom);
819
820 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
821 Custom);
822
823 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
824
826 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
827 Custom);
828
830 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
831 Custom);
832
833 // RVV has native int->float & float->int conversions where the
834 // element type sizes are within one power-of-two of each other. Any
835 // wider distances between type sizes have to be lowered as sequences
836 // which progressively narrow the gap in stages.
841 VT, Custom);
843 Custom);
844
845 // Expand all extending loads to types larger than this, and truncating
846 // stores from types larger than this.
848 setTruncStoreAction(VT, OtherVT, Expand);
850 OtherVT, Expand);
851 }
852
853 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
854 ISD::VP_TRUNCATE, ISD::VP_SETCC},
855 VT, Custom);
856
859
861
862 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
863 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
864 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
865
868 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
869 }
870
871 for (MVT VT : IntVecVTs) {
872 if (!isTypeLegal(VT))
873 continue;
874
877
878 // Vectors implement MULHS/MULHU.
880
881 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
882 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
884
886 Legal);
887
889
890 // Custom-lower extensions and truncations from/to mask types.
892 VT, Custom);
893
894 // RVV has native int->float & float->int conversions where the
895 // element type sizes are within one power-of-two of each other. Any
896 // wider distances between type sizes have to be lowered as sequences
897 // which progressively narrow the gap in stages.
902 VT, Custom);
904 Custom);
908 VT, Legal);
909
910 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
911 // nodes which truncate by one power of two at a time.
914 Custom);
915
916 // Custom-lower insert/extract operations to simplify patterns.
918 Custom);
919
920 // Custom-lower reduction operations to set up the corresponding custom
921 // nodes' operands.
922 setOperationAction(IntegerVecReduceOps, VT, Custom);
923
924 setOperationAction(IntegerVPOps, VT, Custom);
925
926 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
927
928 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
929 VT, Custom);
930
932 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
933 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
934 VT, Custom);
935 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
936
939 VT, Custom);
940
943
945
947 setTruncStoreAction(VT, OtherVT, Expand);
949 OtherVT, Expand);
950 }
951
954
955 // Splice
957
958 if (Subtarget.hasStdExtZvkb()) {
960 setOperationAction(ISD::VP_BSWAP, VT, Custom);
961 } else {
962 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
964 }
965
966 if (Subtarget.hasStdExtZvbb()) {
968 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
969 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
970 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
971 VT, Custom);
972 } else {
973 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
975 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
976 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
977 VT, Expand);
978
979 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
980 // range of f32.
981 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
982 if (isTypeLegal(FloatVT)) {
984 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
985 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
986 VT, Custom);
987 }
988 }
989
991 }
992
993 for (MVT VT : VecTupleVTs) {
994 if (!isTypeLegal(VT))
995 continue;
996
997 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
998 }
999
1000 // Expand various CCs to best match the RVV ISA, which natively supports UNE
1001 // but no other unordered comparisons, and supports all ordered comparisons
1002 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
1003 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
1004 // and we pattern-match those back to the "original", swapping operands once
1005 // more. This way we catch both operations and both "vf" and "fv" forms with
1006 // fewer patterns.
1007 static const ISD::CondCode VFPCCToExpand[] = {
1011 };
1012
1013 // TODO: support more ops.
1014 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1015 ISD::FMINNUM,
1016 ISD::FMAXNUM,
1017 ISD::FMINIMUMNUM,
1018 ISD::FMAXIMUMNUM,
1019 ISD::FADD,
1020 ISD::FSUB,
1021 ISD::FMUL,
1022 ISD::FMA,
1023 ISD::FDIV,
1024 ISD::FSQRT,
1025 ISD::FCEIL,
1026 ISD::FTRUNC,
1027 ISD::FFLOOR,
1028 ISD::FROUND,
1029 ISD::FROUNDEVEN,
1030 ISD::FRINT,
1031 ISD::FNEARBYINT,
1033 ISD::SETCC,
1034 ISD::FMAXIMUM,
1035 ISD::FMINIMUM,
1042 ISD::VECREDUCE_FMIN,
1043 ISD::VECREDUCE_FMAX,
1044 ISD::VECREDUCE_FMINIMUM,
1045 ISD::VECREDUCE_FMAXIMUM};
1046
1047 // TODO: support more vp ops.
1048 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1049 ISD::VP_FADD,
1050 ISD::VP_FSUB,
1051 ISD::VP_FMUL,
1052 ISD::VP_FDIV,
1053 ISD::VP_FMA,
1054 ISD::VP_REDUCE_FMIN,
1055 ISD::VP_REDUCE_FMAX,
1056 ISD::VP_SQRT,
1057 ISD::VP_FMINNUM,
1058 ISD::VP_FMAXNUM,
1059 ISD::VP_FCEIL,
1060 ISD::VP_FFLOOR,
1061 ISD::VP_FROUND,
1062 ISD::VP_FROUNDEVEN,
1063 ISD::VP_FROUNDTOZERO,
1064 ISD::VP_FRINT,
1065 ISD::VP_FNEARBYINT,
1066 ISD::VP_SETCC,
1067 ISD::VP_FMINIMUM,
1068 ISD::VP_FMAXIMUM,
1069 ISD::VP_REDUCE_FMINIMUM,
1070 ISD::VP_REDUCE_FMAXIMUM};
1071
1072 // Sets common operation actions on RVV floating-point vector types.
1073 const auto SetCommonVFPActions = [&](MVT VT) {
1075 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1076 // sizes are within one power-of-two of each other. Therefore conversions
1077 // between vXf16 and vXf64 must be lowered as sequences which convert via
1078 // vXf32.
1079 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1080 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1081 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1082 // Custom-lower insert/extract operations to simplify patterns.
1084 Custom);
1085 // Expand various condition codes (explained above).
1086 setCondCodeAction(VFPCCToExpand, VT, Expand);
1087
1089 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1090 Legal);
1091 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1092
1093 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1094 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1096 VT, Custom);
1097
1098 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1099
1100 // Expand FP operations that need libcalls.
1101 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1102
1104
1105 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1106
1107 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1108 VT, Custom);
1109
1111 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1112 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1113 VT, Custom);
1114 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1115
1118
1121 VT, Custom);
1122
1125
1127 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1128 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1129
1130 setOperationAction(FloatingPointVPOps, VT, Custom);
1131
1133 Custom);
1136 VT, Legal);
1141 VT, Custom);
1142
1144 };
1145
1146 // Sets common extload/truncstore actions on RVV floating-point vector
1147 // types.
1148 const auto SetCommonVFPExtLoadTruncStoreActions =
1149 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1150 for (auto SmallVT : SmallerVTs) {
1151 setTruncStoreAction(VT, SmallVT, Expand);
1152 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1153 }
1154 };
1155
1156 // Sets common actions for f16 and bf16 for when there's only
1157 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1158 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1159 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1161 Custom);
1162 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1163 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1164 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1165 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1166 Custom);
1168 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1174 VT, Custom);
1175 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1176 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1177 MVT EltVT = VT.getVectorElementType();
1178 if (isTypeLegal(EltVT))
1179 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1181 VT, Custom);
1182 else
1183 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1184 EltVT, Custom);
1185 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1186 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1187 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1188 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1189 ISD::VP_SCATTER},
1190 VT, Custom);
1191 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1192
1193 setOperationAction(ISD::FNEG, VT, Expand);
1194 setOperationAction(ISD::FABS, VT, Expand);
1196
1197 // Expand FP operations that need libcalls.
1198 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1199
1200 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1201 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1202 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1203 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1204 } else {
1205 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1206 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1207 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1208 }
1209 };
1210
1211 if (Subtarget.hasVInstructionsF16()) {
1212 for (MVT VT : F16VecVTs) {
1213 if (!isTypeLegal(VT))
1214 continue;
1215 SetCommonVFPActions(VT);
1216 }
1217 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1218 for (MVT VT : F16VecVTs) {
1219 if (!isTypeLegal(VT))
1220 continue;
1221 SetCommonPromoteToF32Actions(VT);
1222 }
1223 }
1224
1225 if (Subtarget.hasVInstructionsBF16Minimal()) {
1226 for (MVT VT : BF16VecVTs) {
1227 if (!isTypeLegal(VT))
1228 continue;
1229 SetCommonPromoteToF32Actions(VT);
1230 }
1231 }
1232
1233 if (Subtarget.hasVInstructionsF32()) {
1234 for (MVT VT : F32VecVTs) {
1235 if (!isTypeLegal(VT))
1236 continue;
1237 SetCommonVFPActions(VT);
1238 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1239 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1240 }
1241 }
1242
1243 if (Subtarget.hasVInstructionsF64()) {
1244 for (MVT VT : F64VecVTs) {
1245 if (!isTypeLegal(VT))
1246 continue;
1247 SetCommonVFPActions(VT);
1248 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1249 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1250 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1251 }
1252 }
1253
1254 if (Subtarget.useRVVForFixedLengthVectors()) {
1256 if (!useRVVForFixedLengthVectorVT(VT))
1257 continue;
1258
1259 // By default everything must be expanded.
1260 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1263 setTruncStoreAction(VT, OtherVT, Expand);
1265 OtherVT, Expand);
1266 }
1267
1268 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1269 // expansion to a build_vector of 0s.
1271
1272 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1274 Custom);
1275
1278 Custom);
1279
1281 VT, Custom);
1282
1284 VT, Custom);
1285
1287
1288 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1289
1291
1293
1296 Custom);
1297
1298 setOperationAction(ISD::BITCAST, VT, Custom);
1299
1301 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1302 Custom);
1303
1305 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1306 Custom);
1307
1309 {
1318 },
1319 VT, Custom);
1321 Custom);
1322
1324
1325 // Operations below are different for between masks and other vectors.
1326 if (VT.getVectorElementType() == MVT::i1) {
1327 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1328 ISD::OR, ISD::XOR},
1329 VT, Custom);
1330
1331 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1332 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1333 VT, Custom);
1334
1335 setOperationAction(ISD::VP_MERGE, VT, Custom);
1336
1337 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1338 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1339 continue;
1340 }
1341
1342 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1343 // it before type legalization for i64 vectors on RV32. It will then be
1344 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1345 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1346 // improvements first.
1347 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1350
1351 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1353 }
1354
1356 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1357
1358 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1359 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1360 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1361 ISD::VP_SCATTER},
1362 VT, Custom);
1363 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1364
1368 VT, Custom);
1369
1372
1374
1375 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1376 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1378
1382 VT, Custom);
1383
1385
1388
1389 // Custom-lower reduction operations to set up the corresponding custom
1390 // nodes' operands.
1391 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1392 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1393 ISD::VECREDUCE_UMIN},
1394 VT, Custom);
1395
1396 setOperationAction(IntegerVPOps, VT, Custom);
1397
1398 if (Subtarget.hasStdExtZvkb())
1400
1401 if (Subtarget.hasStdExtZvbb()) {
1404 VT, Custom);
1405 } else {
1406 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1407 // range of f32.
1408 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1409 if (isTypeLegal(FloatVT))
1412 Custom);
1413 }
1414
1416 }
1417
1419 // There are no extending loads or truncating stores.
1420 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1421 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1422 setTruncStoreAction(VT, InnerVT, Expand);
1423 }
1424
1425 if (!useRVVForFixedLengthVectorVT(VT))
1426 continue;
1427
1428 // By default everything must be expanded.
1429 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1431
1432 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1433 // expansion to a build_vector of 0s.
1435
1440 VT, Custom);
1441 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1442 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1443
1445 VT, Custom);
1446
1447 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1448 ISD::MGATHER, ISD::MSCATTER},
1449 VT, Custom);
1450 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1451 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1452 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1453 VT, Custom);
1454 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1455
1456 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1458 Custom);
1459
1460 if (VT.getVectorElementType() == MVT::f16 &&
1461 !Subtarget.hasVInstructionsF16()) {
1462 setOperationAction(ISD::BITCAST, VT, Custom);
1463 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1465 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1466 Custom);
1467 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1468 Custom);
1469 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1470 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1471 if (Subtarget.hasStdExtZfhmin()) {
1473 } else {
1474 // We need to custom legalize f16 build vectors if Zfhmin isn't
1475 // available.
1477 }
1478 setOperationAction(ISD::FNEG, VT, Expand);
1479 setOperationAction(ISD::FABS, VT, Expand);
1481 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1482 // Don't promote f16 vector operations to f32 if f32 vector type is
1483 // not legal.
1484 // TODO: could split the f16 vector into two vectors and do promotion.
1485 if (!isTypeLegal(F32VecVT))
1486 continue;
1487 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1488 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1489 continue;
1490 }
1491
1492 if (VT.getVectorElementType() == MVT::bf16) {
1493 setOperationAction(ISD::BITCAST, VT, Custom);
1494 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1495 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1496 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1497 if (Subtarget.hasStdExtZfbfmin()) {
1499 } else {
1500 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1501 // available.
1503 }
1505 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1506 Custom);
1507 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1508 // Don't promote f16 vector operations to f32 if f32 vector type is
1509 // not legal.
1510 // TODO: could split the f16 vector into two vectors and do promotion.
1511 if (!isTypeLegal(F32VecVT))
1512 continue;
1513 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1514 // TODO: Promote VP ops to fp32.
1515 continue;
1516 }
1517
1519 Custom);
1520
1522 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1523 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1524 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1525 ISD::FMAXIMUM, ISD::FMINIMUM},
1526 VT, Custom);
1527
1528 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1529 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1530 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1531 ISD::FNEARBYINT},
1532 VT, Custom);
1533
1534 setCondCodeAction(VFPCCToExpand, VT, Expand);
1535
1538
1539 setOperationAction(ISD::BITCAST, VT, Custom);
1540
1541 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1542
1543 setOperationAction(FloatingPointVPOps, VT, Custom);
1544
1551 VT, Custom);
1552 }
1553
1554 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1555 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1556 if (Subtarget.is64Bit())
1557 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1558 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1559 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1560 if (Subtarget.hasStdExtZfbfmin())
1561 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1562 if (Subtarget.hasStdExtFOrZfinx())
1563 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1564 if (Subtarget.hasStdExtDOrZdinx())
1565 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1566 }
1567 }
1568
1569 if (Subtarget.hasStdExtZaamo())
1570 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1571
1572 if (Subtarget.hasForcedAtomics()) {
1573 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1575 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1576 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1577 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1578 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1579 XLenVT, LibCall);
1580 }
1581
1582 if (Subtarget.hasVendorXTHeadMemIdx()) {
1583 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1584 setIndexedLoadAction(im, MVT::i8, Legal);
1585 setIndexedStoreAction(im, MVT::i8, Legal);
1586 setIndexedLoadAction(im, MVT::i16, Legal);
1587 setIndexedStoreAction(im, MVT::i16, Legal);
1588 setIndexedLoadAction(im, MVT::i32, Legal);
1589 setIndexedStoreAction(im, MVT::i32, Legal);
1590
1591 if (Subtarget.is64Bit()) {
1592 setIndexedLoadAction(im, MVT::i64, Legal);
1593 setIndexedStoreAction(im, MVT::i64, Legal);
1594 }
1595 }
1596 }
1597
1598 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1602
1606 }
1607
1608 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1609 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1610 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1611 ISD::PARTIAL_REDUCE_UMLA,
1612 ISD::PARTIAL_REDUCE_SUMLA};
1613 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1614 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1615 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1616 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1617 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1618
1619 if (Subtarget.useRVVForFixedLengthVectors()) {
1621 if (VT.getVectorElementType() != MVT::i32 ||
1622 !useRVVForFixedLengthVectorVT(VT))
1623 continue;
1624 ElementCount EC = VT.getVectorElementCount();
1625 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1626 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1627 }
1628 }
1629 }
1630
1631 // Customize load and store operation for bf16 if zfh isn't enabled.
1632 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1633 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1634 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1635 }
1636
1637 // Function alignments.
1638 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1639 setMinFunctionAlignment(FunctionAlignment);
1640 // Set preferred alignments.
1641 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1642 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1643
1649
1650 if (Subtarget.hasStdExtFOrZfinx())
1651 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1652
1653 if (Subtarget.hasStdExtZbb())
1655
1656 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1657 Subtarget.hasVInstructions())
1659
1660 if (Subtarget.hasStdExtZbkb())
1662
1663 if (Subtarget.hasStdExtFOrZfinx())
1666 if (Subtarget.hasVInstructions())
1668 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1669 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1670 ISD::SRL, ISD::SHL, ISD::STORE,
1672 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1676 ISD::VSELECT, ISD::VECREDUCE_ADD});
1677
1678 if (Subtarget.hasVendorXTHeadMemPair())
1679 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1680 if (Subtarget.useRVVForFixedLengthVectors())
1681 setTargetDAGCombine(ISD::BITCAST);
1682
1683 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
1684
1685 // Disable strict node mutation.
1686 IsStrictFPEnabled = true;
1687 EnableExtLdPromotion = true;
1688
1689 // Let the subtarget decide if a predictable select is more expensive than the
1690 // corresponding branch. This information is used in CGP/SelectOpt to decide
1691 // when to convert selects into branches.
1692 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1693
1694 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1695 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1696
1697 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1698 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1699 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1700
1702 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1703 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1704
1705 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1706 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1707}
1708
1710 LLVMContext &Context,
1711 EVT VT) const {
1712 if (!VT.isVector())
1713 return getPointerTy(DL);
1714 if (Subtarget.hasVInstructions() &&
1715 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1716 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1718}
1719
1721 return Subtarget.getXLenVT();
1722}
1723
1724// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1725bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1726 unsigned VF,
1727 bool IsScalable) const {
1728 if (!Subtarget.hasVInstructions())
1729 return true;
1730
1731 if (!IsScalable)
1732 return true;
1733
1734 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1735 return true;
1736
1737 // Don't allow VF=1 if those types are't legal.
1738 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1739 return true;
1740
1741 // VLEN=32 support is incomplete.
1742 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1743 return true;
1744
1745 // The maximum VF is for the smallest element width with LMUL=8.
1746 // VF must be a power of 2.
1747 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1748 return VF > MaxVF || !isPowerOf2_32(VF);
1749}
1750
1752 return !Subtarget.hasVInstructions() ||
1753 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1754}
1755
1757 const CallInst &I,
1758 MachineFunction &MF,
1759 unsigned Intrinsic) const {
1760 auto &DL = I.getDataLayout();
1761
1762 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1763 bool IsUnitStrided, bool UsePtrVal = false) {
1764 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1765 // We can't use ptrVal if the intrinsic can access memory before the
1766 // pointer. This means we can't use it for strided or indexed intrinsics.
1767 if (UsePtrVal)
1768 Info.ptrVal = I.getArgOperand(PtrOp);
1769 else
1770 Info.fallbackAddressSpace =
1771 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1772 Type *MemTy;
1773 if (IsStore) {
1774 // Store value is the first operand.
1775 MemTy = I.getArgOperand(0)->getType();
1776 } else {
1777 // Use return type. If it's segment load, return type is a struct.
1778 MemTy = I.getType();
1779 if (MemTy->isStructTy())
1780 MemTy = MemTy->getStructElementType(0);
1781 }
1782 if (!IsUnitStrided)
1783 MemTy = MemTy->getScalarType();
1784
1785 Info.memVT = getValueType(DL, MemTy);
1786 if (MemTy->isTargetExtTy()) {
1787 // RISC-V vector tuple type's alignment type should be its element type.
1788 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1789 MemTy = Type::getIntNTy(
1790 MemTy->getContext(),
1791 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1792 ->getZExtValue());
1793 Info.align = DL.getABITypeAlign(MemTy);
1794 } else {
1795 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1796 }
1797 Info.size = MemoryLocation::UnknownSize;
1798 Info.flags |=
1800 return true;
1801 };
1802
1803 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1805
1807 switch (Intrinsic) {
1808 default:
1809 return false;
1810 case Intrinsic::riscv_masked_atomicrmw_xchg:
1811 case Intrinsic::riscv_masked_atomicrmw_add:
1812 case Intrinsic::riscv_masked_atomicrmw_sub:
1813 case Intrinsic::riscv_masked_atomicrmw_nand:
1814 case Intrinsic::riscv_masked_atomicrmw_max:
1815 case Intrinsic::riscv_masked_atomicrmw_min:
1816 case Intrinsic::riscv_masked_atomicrmw_umax:
1817 case Intrinsic::riscv_masked_atomicrmw_umin:
1818 case Intrinsic::riscv_masked_cmpxchg:
1819 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1820 // narrow atomic operation. These will be expanded to an LR/SC loop that
1821 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1822 // will be used to modify the appropriate part of the 4 byte data and
1823 // preserve the rest.
1824 Info.opc = ISD::INTRINSIC_W_CHAIN;
1825 Info.memVT = MVT::i32;
1826 Info.ptrVal = I.getArgOperand(0);
1827 Info.offset = 0;
1828 Info.align = Align(4);
1831 return true;
1832 case Intrinsic::riscv_seg2_load_mask:
1833 case Intrinsic::riscv_seg3_load_mask:
1834 case Intrinsic::riscv_seg4_load_mask:
1835 case Intrinsic::riscv_seg5_load_mask:
1836 case Intrinsic::riscv_seg6_load_mask:
1837 case Intrinsic::riscv_seg7_load_mask:
1838 case Intrinsic::riscv_seg8_load_mask:
1839 case Intrinsic::riscv_sseg2_load_mask:
1840 case Intrinsic::riscv_sseg3_load_mask:
1841 case Intrinsic::riscv_sseg4_load_mask:
1842 case Intrinsic::riscv_sseg5_load_mask:
1843 case Intrinsic::riscv_sseg6_load_mask:
1844 case Intrinsic::riscv_sseg7_load_mask:
1845 case Intrinsic::riscv_sseg8_load_mask:
1846 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1847 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1848 case Intrinsic::riscv_seg2_store_mask:
1849 case Intrinsic::riscv_seg3_store_mask:
1850 case Intrinsic::riscv_seg4_store_mask:
1851 case Intrinsic::riscv_seg5_store_mask:
1852 case Intrinsic::riscv_seg6_store_mask:
1853 case Intrinsic::riscv_seg7_store_mask:
1854 case Intrinsic::riscv_seg8_store_mask:
1855 // Operands are (vec, ..., vec, ptr, mask, vl)
1856 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1857 /*IsStore*/ true,
1858 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1859 case Intrinsic::riscv_sseg2_store_mask:
1860 case Intrinsic::riscv_sseg3_store_mask:
1861 case Intrinsic::riscv_sseg4_store_mask:
1862 case Intrinsic::riscv_sseg5_store_mask:
1863 case Intrinsic::riscv_sseg6_store_mask:
1864 case Intrinsic::riscv_sseg7_store_mask:
1865 case Intrinsic::riscv_sseg8_store_mask:
1866 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1867 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1868 /*IsStore*/ true,
1869 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1870 case Intrinsic::riscv_vlm:
1871 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1872 /*IsStore*/ false,
1873 /*IsUnitStrided*/ true,
1874 /*UsePtrVal*/ true);
1875 case Intrinsic::riscv_vle:
1876 case Intrinsic::riscv_vle_mask:
1877 case Intrinsic::riscv_vleff:
1878 case Intrinsic::riscv_vleff_mask:
1879 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1880 /*IsStore*/ false,
1881 /*IsUnitStrided*/ true,
1882 /*UsePtrVal*/ true);
1883 case Intrinsic::riscv_vsm:
1884 case Intrinsic::riscv_vse:
1885 case Intrinsic::riscv_vse_mask:
1886 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1887 /*IsStore*/ true,
1888 /*IsUnitStrided*/ true,
1889 /*UsePtrVal*/ true);
1890 case Intrinsic::riscv_vlse:
1891 case Intrinsic::riscv_vlse_mask:
1892 case Intrinsic::riscv_vloxei:
1893 case Intrinsic::riscv_vloxei_mask:
1894 case Intrinsic::riscv_vluxei:
1895 case Intrinsic::riscv_vluxei_mask:
1896 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1897 /*IsStore*/ false,
1898 /*IsUnitStrided*/ false);
1899 case Intrinsic::riscv_vsse:
1900 case Intrinsic::riscv_vsse_mask:
1901 case Intrinsic::riscv_vsoxei:
1902 case Intrinsic::riscv_vsoxei_mask:
1903 case Intrinsic::riscv_vsuxei:
1904 case Intrinsic::riscv_vsuxei_mask:
1905 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1906 /*IsStore*/ true,
1907 /*IsUnitStrided*/ false);
1908 case Intrinsic::riscv_vlseg2:
1909 case Intrinsic::riscv_vlseg3:
1910 case Intrinsic::riscv_vlseg4:
1911 case Intrinsic::riscv_vlseg5:
1912 case Intrinsic::riscv_vlseg6:
1913 case Intrinsic::riscv_vlseg7:
1914 case Intrinsic::riscv_vlseg8:
1915 case Intrinsic::riscv_vlseg2ff:
1916 case Intrinsic::riscv_vlseg3ff:
1917 case Intrinsic::riscv_vlseg4ff:
1918 case Intrinsic::riscv_vlseg5ff:
1919 case Intrinsic::riscv_vlseg6ff:
1920 case Intrinsic::riscv_vlseg7ff:
1921 case Intrinsic::riscv_vlseg8ff:
1922 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1923 /*IsStore*/ false,
1924 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1925 case Intrinsic::riscv_vlseg2_mask:
1926 case Intrinsic::riscv_vlseg3_mask:
1927 case Intrinsic::riscv_vlseg4_mask:
1928 case Intrinsic::riscv_vlseg5_mask:
1929 case Intrinsic::riscv_vlseg6_mask:
1930 case Intrinsic::riscv_vlseg7_mask:
1931 case Intrinsic::riscv_vlseg8_mask:
1932 case Intrinsic::riscv_vlseg2ff_mask:
1933 case Intrinsic::riscv_vlseg3ff_mask:
1934 case Intrinsic::riscv_vlseg4ff_mask:
1935 case Intrinsic::riscv_vlseg5ff_mask:
1936 case Intrinsic::riscv_vlseg6ff_mask:
1937 case Intrinsic::riscv_vlseg7ff_mask:
1938 case Intrinsic::riscv_vlseg8ff_mask:
1939 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1940 /*IsStore*/ false,
1941 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1942 case Intrinsic::riscv_vlsseg2:
1943 case Intrinsic::riscv_vlsseg3:
1944 case Intrinsic::riscv_vlsseg4:
1945 case Intrinsic::riscv_vlsseg5:
1946 case Intrinsic::riscv_vlsseg6:
1947 case Intrinsic::riscv_vlsseg7:
1948 case Intrinsic::riscv_vlsseg8:
1949 case Intrinsic::riscv_vloxseg2:
1950 case Intrinsic::riscv_vloxseg3:
1951 case Intrinsic::riscv_vloxseg4:
1952 case Intrinsic::riscv_vloxseg5:
1953 case Intrinsic::riscv_vloxseg6:
1954 case Intrinsic::riscv_vloxseg7:
1955 case Intrinsic::riscv_vloxseg8:
1956 case Intrinsic::riscv_vluxseg2:
1957 case Intrinsic::riscv_vluxseg3:
1958 case Intrinsic::riscv_vluxseg4:
1959 case Intrinsic::riscv_vluxseg5:
1960 case Intrinsic::riscv_vluxseg6:
1961 case Intrinsic::riscv_vluxseg7:
1962 case Intrinsic::riscv_vluxseg8:
1963 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1964 /*IsStore*/ false,
1965 /*IsUnitStrided*/ false);
1966 case Intrinsic::riscv_vlsseg2_mask:
1967 case Intrinsic::riscv_vlsseg3_mask:
1968 case Intrinsic::riscv_vlsseg4_mask:
1969 case Intrinsic::riscv_vlsseg5_mask:
1970 case Intrinsic::riscv_vlsseg6_mask:
1971 case Intrinsic::riscv_vlsseg7_mask:
1972 case Intrinsic::riscv_vlsseg8_mask:
1973 case Intrinsic::riscv_vloxseg2_mask:
1974 case Intrinsic::riscv_vloxseg3_mask:
1975 case Intrinsic::riscv_vloxseg4_mask:
1976 case Intrinsic::riscv_vloxseg5_mask:
1977 case Intrinsic::riscv_vloxseg6_mask:
1978 case Intrinsic::riscv_vloxseg7_mask:
1979 case Intrinsic::riscv_vloxseg8_mask:
1980 case Intrinsic::riscv_vluxseg2_mask:
1981 case Intrinsic::riscv_vluxseg3_mask:
1982 case Intrinsic::riscv_vluxseg4_mask:
1983 case Intrinsic::riscv_vluxseg5_mask:
1984 case Intrinsic::riscv_vluxseg6_mask:
1985 case Intrinsic::riscv_vluxseg7_mask:
1986 case Intrinsic::riscv_vluxseg8_mask:
1987 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1988 /*IsStore*/ false,
1989 /*IsUnitStrided*/ false);
1990 case Intrinsic::riscv_vsseg2:
1991 case Intrinsic::riscv_vsseg3:
1992 case Intrinsic::riscv_vsseg4:
1993 case Intrinsic::riscv_vsseg5:
1994 case Intrinsic::riscv_vsseg6:
1995 case Intrinsic::riscv_vsseg7:
1996 case Intrinsic::riscv_vsseg8:
1997 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1998 /*IsStore*/ true,
1999 /*IsUnitStrided*/ false);
2000 case Intrinsic::riscv_vsseg2_mask:
2001 case Intrinsic::riscv_vsseg3_mask:
2002 case Intrinsic::riscv_vsseg4_mask:
2003 case Intrinsic::riscv_vsseg5_mask:
2004 case Intrinsic::riscv_vsseg6_mask:
2005 case Intrinsic::riscv_vsseg7_mask:
2006 case Intrinsic::riscv_vsseg8_mask:
2007 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2008 /*IsStore*/ true,
2009 /*IsUnitStrided*/ false);
2010 case Intrinsic::riscv_vssseg2:
2011 case Intrinsic::riscv_vssseg3:
2012 case Intrinsic::riscv_vssseg4:
2013 case Intrinsic::riscv_vssseg5:
2014 case Intrinsic::riscv_vssseg6:
2015 case Intrinsic::riscv_vssseg7:
2016 case Intrinsic::riscv_vssseg8:
2017 case Intrinsic::riscv_vsoxseg2:
2018 case Intrinsic::riscv_vsoxseg3:
2019 case Intrinsic::riscv_vsoxseg4:
2020 case Intrinsic::riscv_vsoxseg5:
2021 case Intrinsic::riscv_vsoxseg6:
2022 case Intrinsic::riscv_vsoxseg7:
2023 case Intrinsic::riscv_vsoxseg8:
2024 case Intrinsic::riscv_vsuxseg2:
2025 case Intrinsic::riscv_vsuxseg3:
2026 case Intrinsic::riscv_vsuxseg4:
2027 case Intrinsic::riscv_vsuxseg5:
2028 case Intrinsic::riscv_vsuxseg6:
2029 case Intrinsic::riscv_vsuxseg7:
2030 case Intrinsic::riscv_vsuxseg8:
2031 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2032 /*IsStore*/ true,
2033 /*IsUnitStrided*/ false);
2034 case Intrinsic::riscv_vssseg2_mask:
2035 case Intrinsic::riscv_vssseg3_mask:
2036 case Intrinsic::riscv_vssseg4_mask:
2037 case Intrinsic::riscv_vssseg5_mask:
2038 case Intrinsic::riscv_vssseg6_mask:
2039 case Intrinsic::riscv_vssseg7_mask:
2040 case Intrinsic::riscv_vssseg8_mask:
2041 case Intrinsic::riscv_vsoxseg2_mask:
2042 case Intrinsic::riscv_vsoxseg3_mask:
2043 case Intrinsic::riscv_vsoxseg4_mask:
2044 case Intrinsic::riscv_vsoxseg5_mask:
2045 case Intrinsic::riscv_vsoxseg6_mask:
2046 case Intrinsic::riscv_vsoxseg7_mask:
2047 case Intrinsic::riscv_vsoxseg8_mask:
2048 case Intrinsic::riscv_vsuxseg2_mask:
2049 case Intrinsic::riscv_vsuxseg3_mask:
2050 case Intrinsic::riscv_vsuxseg4_mask:
2051 case Intrinsic::riscv_vsuxseg5_mask:
2052 case Intrinsic::riscv_vsuxseg6_mask:
2053 case Intrinsic::riscv_vsuxseg7_mask:
2054 case Intrinsic::riscv_vsuxseg8_mask:
2055 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2056 /*IsStore*/ true,
2057 /*IsUnitStrided*/ false);
2058 }
2059}
2060
2062 const AddrMode &AM, Type *Ty,
2063 unsigned AS,
2064 Instruction *I) const {
2065 // No global is ever allowed as a base.
2066 if (AM.BaseGV)
2067 return false;
2068
2069 // None of our addressing modes allows a scalable offset
2070 if (AM.ScalableOffset)
2071 return false;
2072
2073 // RVV instructions only support register addressing.
2074 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2075 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2076
2077 // Require a 12-bit signed offset.
2078 if (!isInt<12>(AM.BaseOffs))
2079 return false;
2080
2081 switch (AM.Scale) {
2082 case 0: // "r+i" or just "i", depending on HasBaseReg.
2083 break;
2084 case 1:
2085 if (!AM.HasBaseReg) // allow "r+i".
2086 break;
2087 return false; // disallow "r+r" or "r+r+i".
2088 default:
2089 return false;
2090 }
2091
2092 return true;
2093}
2094
2096 return isInt<12>(Imm);
2097}
2098
2100 return isInt<12>(Imm);
2101}
2102
2103// On RV32, 64-bit integers are split into their high and low parts and held
2104// in two different registers, so the trunc is free since the low register can
2105// just be used.
2106// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2107// isTruncateFree?
2109 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2110 return false;
2111 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2112 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2113 return (SrcBits == 64 && DestBits == 32);
2114}
2115
2117 // We consider i64->i32 free on RV64 since we have good selection of W
2118 // instructions that make promoting operations back to i64 free in many cases.
2119 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2120 !DstVT.isInteger())
2121 return false;
2122 unsigned SrcBits = SrcVT.getSizeInBits();
2123 unsigned DestBits = DstVT.getSizeInBits();
2124 return (SrcBits == 64 && DestBits == 32);
2125}
2126
2128 EVT SrcVT = Val.getValueType();
2129 // free truncate from vnsrl and vnsra
2130 if (Subtarget.hasVInstructions() &&
2131 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2132 SrcVT.isVector() && VT2.isVector()) {
2133 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2134 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2135 if (SrcBits == DestBits * 2) {
2136 return true;
2137 }
2138 }
2139 return TargetLowering::isTruncateFree(Val, VT2);
2140}
2141
2143 // Zexts are free if they can be combined with a load.
2144 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2145 // poorly with type legalization of compares preferring sext.
2146 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2147 EVT MemVT = LD->getMemoryVT();
2148 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2149 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2150 LD->getExtensionType() == ISD::ZEXTLOAD))
2151 return true;
2152 }
2153
2154 return TargetLowering::isZExtFree(Val, VT2);
2155}
2156
2158 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2159}
2160
2162 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2163}
2164
2166 return Subtarget.hasCTZLike();
2167}
2168
2170 return Subtarget.hasCLZLike();
2171}
2172
2174 const Instruction &AndI) const {
2175 // We expect to be able to match a bit extraction instruction if the Zbs
2176 // extension is supported and the mask is a power of two. However, we
2177 // conservatively return false if the mask would fit in an ANDI instruction,
2178 // on the basis that it's possible the sinking+duplication of the AND in
2179 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2180 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2181 if (!Subtarget.hasBEXTILike())
2182 return false;
2184 if (!Mask)
2185 return false;
2186 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2187}
2188
2190 EVT VT = Y.getValueType();
2191
2192 if (VT.isVector())
2193 return false;
2194
2195 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2196 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2197}
2198
2200 EVT VT = Y.getValueType();
2201
2202 if (!VT.isVector())
2203 return hasAndNotCompare(Y);
2204
2205 return Subtarget.hasStdExtZvkb();
2206}
2207
2209 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2210 if (Subtarget.hasStdExtZbs())
2211 return X.getValueType().isScalarInteger();
2212 auto *C = dyn_cast<ConstantSDNode>(Y);
2213 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2214 if (Subtarget.hasVendorXTHeadBs())
2215 return C != nullptr;
2216 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2217 return C && C->getAPIntValue().ule(10);
2218}
2219
2221 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2222 SDValue Y) const {
2223 if (SelectOpcode != ISD::VSELECT)
2224 return false;
2225
2226 // Only enable for rvv.
2227 if (!VT.isVector() || !Subtarget.hasVInstructions())
2228 return false;
2229
2230 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2231 return false;
2232
2233 return true;
2234}
2235
2237 Type *Ty) const {
2238 assert(Ty->isIntegerTy());
2239
2240 unsigned BitSize = Ty->getIntegerBitWidth();
2241 if (BitSize > Subtarget.getXLen())
2242 return false;
2243
2244 // Fast path, assume 32-bit immediates are cheap.
2245 int64_t Val = Imm.getSExtValue();
2246 if (isInt<32>(Val))
2247 return true;
2248
2249 // A constant pool entry may be more aligned than the load we're trying to
2250 // replace. If we don't support unaligned scalar mem, prefer the constant
2251 // pool.
2252 // TODO: Can the caller pass down the alignment?
2253 if (!Subtarget.enableUnalignedScalarMem())
2254 return true;
2255
2256 // Prefer to keep the load if it would require many instructions.
2257 // This uses the same threshold we use for constant pools but doesn't
2258 // check useConstantPoolForLargeInts.
2259 // TODO: Should we keep the load only when we're definitely going to emit a
2260 // constant pool?
2261
2263 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2264}
2265
2269 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2270 SelectionDAG &DAG) const {
2271 // One interesting pattern that we'd want to form is 'bit extract':
2272 // ((1 >> Y) & 1) ==/!= 0
2273 // But we also need to be careful not to try to reverse that fold.
2274
2275 // Is this '((1 >> Y) & 1)'?
2276 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2277 return false; // Keep the 'bit extract' pattern.
2278
2279 // Will this be '((1 >> Y) & 1)' after the transform?
2280 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2281 return true; // Do form the 'bit extract' pattern.
2282
2283 // If 'X' is a constant, and we transform, then we will immediately
2284 // try to undo the fold, thus causing endless combine loop.
2285 // So only do the transform if X is not a constant. This matches the default
2286 // implementation of this function.
2287 return !XC;
2288}
2289
2291 unsigned Opc = VecOp.getOpcode();
2292
2293 // Assume target opcodes can't be scalarized.
2294 // TODO - do we have any exceptions?
2295 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2296 return false;
2297
2298 // If the vector op is not supported, try to convert to scalar.
2299 EVT VecVT = VecOp.getValueType();
2301 return true;
2302
2303 // If the vector op is supported, but the scalar op is not, the transform may
2304 // not be worthwhile.
2305 // Permit a vector binary operation can be converted to scalar binary
2306 // operation which is custom lowered with illegal type.
2307 EVT ScalarVT = VecVT.getScalarType();
2308 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2309 isOperationCustom(Opc, ScalarVT);
2310}
2311
2313 const GlobalAddressSDNode *GA) const {
2314 // In order to maximise the opportunity for common subexpression elimination,
2315 // keep a separate ADD node for the global address offset instead of folding
2316 // it in the global address node. Later peephole optimisations may choose to
2317 // fold it back in when profitable.
2318 return false;
2319}
2320
2321// Returns 0-31 if the fli instruction is available for the type and this is
2322// legal FP immediate for the type. Returns -1 otherwise.
2324 if (!Subtarget.hasStdExtZfa())
2325 return -1;
2326
2327 bool IsSupportedVT = false;
2328 if (VT == MVT::f16) {
2329 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2330 } else if (VT == MVT::f32) {
2331 IsSupportedVT = true;
2332 } else if (VT == MVT::f64) {
2333 assert(Subtarget.hasStdExtD() && "Expect D extension");
2334 IsSupportedVT = true;
2335 }
2336
2337 if (!IsSupportedVT)
2338 return -1;
2339
2340 return RISCVLoadFPImm::getLoadFPImm(Imm);
2341}
2342
2344 bool ForCodeSize) const {
2345 bool IsLegalVT = false;
2346 if (VT == MVT::f16)
2347 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2348 else if (VT == MVT::f32)
2349 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2350 else if (VT == MVT::f64)
2351 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2352 else if (VT == MVT::bf16)
2353 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2354
2355 if (!IsLegalVT)
2356 return false;
2357
2358 if (getLegalZfaFPImm(Imm, VT) >= 0)
2359 return true;
2360
2361 // Some constants can be produced by fli+fneg.
2362 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2363 return true;
2364
2365 // Cannot create a 64 bit floating-point immediate value for rv32.
2366 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2367 // td can handle +0.0 or -0.0 already.
2368 // -0.0 can be created by fmv + fneg.
2369 return Imm.isZero();
2370 }
2371
2372 // Special case: fmv + fneg
2373 if (Imm.isNegZero())
2374 return true;
2375
2376 // Building an integer and then converting requires a fmv at the end of
2377 // the integer sequence. The fmv is not required for Zfinx.
2378 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2379 const int Cost =
2380 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2381 Subtarget.getXLen(), Subtarget);
2382 return Cost <= FPImmCost;
2383}
2384
2385// TODO: This is very conservative.
2387 unsigned Index) const {
2389 return false;
2390
2391 // Extracts from index 0 are just subreg extracts.
2392 if (Index == 0)
2393 return true;
2394
2395 // Only support extracting a fixed from a fixed vector for now.
2396 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2397 return false;
2398
2399 EVT EltVT = ResVT.getVectorElementType();
2400 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2401
2402 // The smallest type we can slide is i8.
2403 // TODO: We can extract index 0 from a mask vector without a slide.
2404 if (EltVT == MVT::i1)
2405 return false;
2406
2407 unsigned ResElts = ResVT.getVectorNumElements();
2408 unsigned SrcElts = SrcVT.getVectorNumElements();
2409
2410 unsigned MinVLen = Subtarget.getRealMinVLen();
2411 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2412
2413 // If we're extracting only data from the first VLEN bits of the source
2414 // then we can always do this with an m1 vslidedown.vx. Restricting the
2415 // Index ensures we can use a vslidedown.vi.
2416 // TODO: We can generalize this when the exact VLEN is known.
2417 if (Index + ResElts <= MinVLMAX && Index < 31)
2418 return true;
2419
2420 // Convervatively only handle extracting half of a vector.
2421 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2422 // the upper half of a vector until we have more test coverage.
2423 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2424 // a cheap extract. However, this case is important in practice for
2425 // shuffled extracts of longer vectors. How resolve?
2426 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2427}
2428
2430 CallingConv::ID CC,
2431 EVT VT) const {
2432 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2433 // We might still end up using a GPR but that will be decided based on ABI.
2434 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2435 !Subtarget.hasStdExtZfhminOrZhinxmin())
2436 return MVT::f32;
2437
2438 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2439
2440 return PartVT;
2441}
2442
2443unsigned
2445 std::optional<MVT> RegisterVT) const {
2446 // Pair inline assembly operand
2447 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2448 *RegisterVT == MVT::Untyped)
2449 return 1;
2450
2451 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2452}
2453
2455 CallingConv::ID CC,
2456 EVT VT) const {
2457 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2458 // We might still end up using a GPR but that will be decided based on ABI.
2459 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2460 !Subtarget.hasStdExtZfhminOrZhinxmin())
2461 return 1;
2462
2463 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2464}
2465
2467 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2468 unsigned &NumIntermediates, MVT &RegisterVT) const {
2470 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2471
2472 return NumRegs;
2473}
2474
2475// Changes the condition code and swaps operands if necessary, so the SetCC
2476// operation matches one of the comparisons supported directly by branches
2477// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2478// with 1/-1.
2480 ISD::CondCode &CC, SelectionDAG &DAG,
2481 const RISCVSubtarget &Subtarget) {
2482 // If this is a single bit test that can't be handled by ANDI, shift the
2483 // bit to be tested to the MSB and perform a signed compare with 0.
2484 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2485 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2486 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2487 // XAndesPerf supports branch on test bit.
2488 !Subtarget.hasVendorXAndesPerf()) {
2489 uint64_t Mask = LHS.getConstantOperandVal(1);
2490 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2491 unsigned ShAmt = 0;
2492 if (isPowerOf2_64(Mask)) {
2493 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2494 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2495 } else {
2496 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2497 }
2498
2499 LHS = LHS.getOperand(0);
2500 if (ShAmt != 0)
2501 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2502 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2503 return;
2504 }
2505 }
2506
2507 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2508 int64_t C = RHSC->getSExtValue();
2509 switch (CC) {
2510 default: break;
2511 case ISD::SETGT:
2512 // Convert X > -1 to X >= 0.
2513 if (C == -1) {
2514 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2515 CC = ISD::SETGE;
2516 return;
2517 }
2518 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2519 C != INT64_MAX && isInt<5>(C + 1)) {
2520 // We have a conditional move instruction for SETGE but not SETGT.
2521 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2522 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2523 CC = ISD::SETGE;
2524 return;
2525 }
2526 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2527 // We have a branch immediate instruction for SETGE but not SETGT.
2528 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2529 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2530 CC = ISD::SETGE;
2531 return;
2532 }
2533 break;
2534 case ISD::SETLT:
2535 // Convert X < 1 to 0 >= X.
2536 if (C == 1) {
2537 RHS = LHS;
2538 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2539 CC = ISD::SETGE;
2540 return;
2541 }
2542 break;
2543 case ISD::SETUGT:
2544 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2545 C != INT64_MAX && isUInt<5>(C + 1)) {
2546 // We have a conditional move instruction for SETUGE but not SETUGT.
2547 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2548 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2549 CC = ISD::SETUGE;
2550 return;
2551 }
2552 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2553 // We have a branch immediate instruction for SETUGE but not SETUGT.
2554 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2555 // immediate.
2556 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2557 CC = ISD::SETUGE;
2558 return;
2559 }
2560 break;
2561 }
2562 }
2563
2564 switch (CC) {
2565 default:
2566 break;
2567 case ISD::SETGT:
2568 case ISD::SETLE:
2569 case ISD::SETUGT:
2570 case ISD::SETULE:
2572 std::swap(LHS, RHS);
2573 break;
2574 }
2575}
2576
2578 if (VT.isRISCVVectorTuple()) {
2579 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2580 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2581 return RISCVVType::LMUL_F8;
2582 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2583 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2584 return RISCVVType::LMUL_F4;
2585 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2586 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2587 return RISCVVType::LMUL_F2;
2588 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2589 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2590 return RISCVVType::LMUL_1;
2591 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2592 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2593 return RISCVVType::LMUL_2;
2594 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2595 return RISCVVType::LMUL_4;
2596 llvm_unreachable("Invalid vector tuple type LMUL.");
2597 }
2598
2599 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2600 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2601 if (VT.getVectorElementType() == MVT::i1)
2602 KnownSize *= 8;
2603
2604 switch (KnownSize) {
2605 default:
2606 llvm_unreachable("Invalid LMUL.");
2607 case 8:
2608 return RISCVVType::LMUL_F8;
2609 case 16:
2610 return RISCVVType::LMUL_F4;
2611 case 32:
2612 return RISCVVType::LMUL_F2;
2613 case 64:
2614 return RISCVVType::LMUL_1;
2615 case 128:
2616 return RISCVVType::LMUL_2;
2617 case 256:
2618 return RISCVVType::LMUL_4;
2619 case 512:
2620 return RISCVVType::LMUL_8;
2621 }
2622}
2623
2625 switch (LMul) {
2626 default:
2627 llvm_unreachable("Invalid LMUL.");
2631 case RISCVVType::LMUL_1:
2632 return RISCV::VRRegClassID;
2633 case RISCVVType::LMUL_2:
2634 return RISCV::VRM2RegClassID;
2635 case RISCVVType::LMUL_4:
2636 return RISCV::VRM4RegClassID;
2637 case RISCVVType::LMUL_8:
2638 return RISCV::VRM8RegClassID;
2639 }
2640}
2641
2642unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2643 RISCVVType::VLMUL LMUL = getLMUL(VT);
2644 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2645 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2646 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2647 "Unexpected subreg numbering");
2648 return RISCV::sub_vrm1_0 + Index;
2649 }
2650 if (LMUL == RISCVVType::LMUL_2) {
2651 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2652 "Unexpected subreg numbering");
2653 return RISCV::sub_vrm2_0 + Index;
2654 }
2655 if (LMUL == RISCVVType::LMUL_4) {
2656 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2657 "Unexpected subreg numbering");
2658 return RISCV::sub_vrm4_0 + Index;
2659 }
2660 llvm_unreachable("Invalid vector type.");
2661}
2662
2664 if (VT.isRISCVVectorTuple()) {
2665 unsigned NF = VT.getRISCVVectorTupleNumFields();
2666 unsigned RegsPerField =
2667 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2668 (NF * RISCV::RVVBitsPerBlock));
2669 switch (RegsPerField) {
2670 case 1:
2671 if (NF == 2)
2672 return RISCV::VRN2M1RegClassID;
2673 if (NF == 3)
2674 return RISCV::VRN3M1RegClassID;
2675 if (NF == 4)
2676 return RISCV::VRN4M1RegClassID;
2677 if (NF == 5)
2678 return RISCV::VRN5M1RegClassID;
2679 if (NF == 6)
2680 return RISCV::VRN6M1RegClassID;
2681 if (NF == 7)
2682 return RISCV::VRN7M1RegClassID;
2683 if (NF == 8)
2684 return RISCV::VRN8M1RegClassID;
2685 break;
2686 case 2:
2687 if (NF == 2)
2688 return RISCV::VRN2M2RegClassID;
2689 if (NF == 3)
2690 return RISCV::VRN3M2RegClassID;
2691 if (NF == 4)
2692 return RISCV::VRN4M2RegClassID;
2693 break;
2694 case 4:
2695 assert(NF == 2);
2696 return RISCV::VRN2M4RegClassID;
2697 default:
2698 break;
2699 }
2700 llvm_unreachable("Invalid vector tuple type RegClass.");
2701 }
2702
2703 if (VT.getVectorElementType() == MVT::i1)
2704 return RISCV::VRRegClassID;
2705 return getRegClassIDForLMUL(getLMUL(VT));
2706}
2707
2708// Attempt to decompose a subvector insert/extract between VecVT and
2709// SubVecVT via subregister indices. Returns the subregister index that
2710// can perform the subvector insert/extract with the given element index, as
2711// well as the index corresponding to any leftover subvectors that must be
2712// further inserted/extracted within the register class for SubVecVT.
2713std::pair<unsigned, unsigned>
2715 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2716 const RISCVRegisterInfo *TRI) {
2717 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2718 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2719 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2720 "Register classes not ordered");
2721 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2722 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2723
2724 // If VecVT is a vector tuple type, either it's the tuple type with same
2725 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2726 if (VecVT.isRISCVVectorTuple()) {
2727 if (VecRegClassID == SubRegClassID)
2728 return {RISCV::NoSubRegister, 0};
2729
2730 assert(SubVecVT.isScalableVector() &&
2731 "Only allow scalable vector subvector.");
2732 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2733 "Invalid vector tuple insert/extract for vector and subvector with "
2734 "different LMUL.");
2735 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2736 }
2737
2738 // Try to compose a subregister index that takes us from the incoming
2739 // LMUL>1 register class down to the outgoing one. At each step we half
2740 // the LMUL:
2741 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2742 // Note that this is not guaranteed to find a subregister index, such as
2743 // when we are extracting from one VR type to another.
2744 unsigned SubRegIdx = RISCV::NoSubRegister;
2745 for (const unsigned RCID :
2746 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2747 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2748 VecVT = VecVT.getHalfNumVectorElementsVT();
2749 bool IsHi =
2750 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2751 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2752 getSubregIndexByMVT(VecVT, IsHi));
2753 if (IsHi)
2754 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2755 }
2756 return {SubRegIdx, InsertExtractIdx};
2757}
2758
2759// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2760// stores for those types.
2761bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2762 return !Subtarget.useRVVForFixedLengthVectors() ||
2763 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2764}
2765
2767 if (!ScalarTy.isSimple())
2768 return false;
2769 switch (ScalarTy.getSimpleVT().SimpleTy) {
2770 case MVT::iPTR:
2771 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2772 case MVT::i8:
2773 case MVT::i16:
2774 case MVT::i32:
2775 return Subtarget.hasVInstructions();
2776 case MVT::i64:
2777 return Subtarget.hasVInstructionsI64();
2778 case MVT::f16:
2779 return Subtarget.hasVInstructionsF16Minimal();
2780 case MVT::bf16:
2781 return Subtarget.hasVInstructionsBF16Minimal();
2782 case MVT::f32:
2783 return Subtarget.hasVInstructionsF32();
2784 case MVT::f64:
2785 return Subtarget.hasVInstructionsF64();
2786 default:
2787 return false;
2788 }
2789}
2790
2791
2793 return NumRepeatedDivisors;
2794}
2795
2797 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2798 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2799 "Unexpected opcode");
2800 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2801 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2803 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2804 if (!II)
2805 return SDValue();
2806 return Op.getOperand(II->VLOperand + 1 + HasChain);
2807}
2808
2810 const RISCVSubtarget &Subtarget) {
2811 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2812 if (!Subtarget.useRVVForFixedLengthVectors())
2813 return false;
2814
2815 // We only support a set of vector types with a consistent maximum fixed size
2816 // across all supported vector element types to avoid legalization issues.
2817 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2818 // fixed-length vector type we support is 1024 bytes.
2819 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2820 return false;
2821
2822 unsigned MinVLen = Subtarget.getRealMinVLen();
2823
2824 MVT EltVT = VT.getVectorElementType();
2825
2826 // Don't use RVV for vectors we cannot scalarize if required.
2827 switch (EltVT.SimpleTy) {
2828 // i1 is supported but has different rules.
2829 default:
2830 return false;
2831 case MVT::i1:
2832 // Masks can only use a single register.
2833 if (VT.getVectorNumElements() > MinVLen)
2834 return false;
2835 MinVLen /= 8;
2836 break;
2837 case MVT::i8:
2838 case MVT::i16:
2839 case MVT::i32:
2840 break;
2841 case MVT::i64:
2842 if (!Subtarget.hasVInstructionsI64())
2843 return false;
2844 break;
2845 case MVT::f16:
2846 if (!Subtarget.hasVInstructionsF16Minimal())
2847 return false;
2848 break;
2849 case MVT::bf16:
2850 if (!Subtarget.hasVInstructionsBF16Minimal())
2851 return false;
2852 break;
2853 case MVT::f32:
2854 if (!Subtarget.hasVInstructionsF32())
2855 return false;
2856 break;
2857 case MVT::f64:
2858 if (!Subtarget.hasVInstructionsF64())
2859 return false;
2860 break;
2861 }
2862
2863 // Reject elements larger than ELEN.
2864 if (EltVT.getSizeInBits() > Subtarget.getELen())
2865 return false;
2866
2867 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2868 // Don't use RVV for types that don't fit.
2869 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2870 return false;
2871
2872 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2873 // the base fixed length RVV support in place.
2874 if (!VT.isPow2VectorType())
2875 return false;
2876
2877 return true;
2878}
2879
2880bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2881 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2882}
2883
2884// Return the largest legal scalable vector type that matches VT's element type.
2886 const RISCVSubtarget &Subtarget) {
2887 // This may be called before legal types are setup.
2888 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2889 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2890 "Expected legal fixed length vector!");
2891
2892 unsigned MinVLen = Subtarget.getRealMinVLen();
2893 unsigned MaxELen = Subtarget.getELen();
2894
2895 MVT EltVT = VT.getVectorElementType();
2896 switch (EltVT.SimpleTy) {
2897 default:
2898 llvm_unreachable("unexpected element type for RVV container");
2899 case MVT::i1:
2900 case MVT::i8:
2901 case MVT::i16:
2902 case MVT::i32:
2903 case MVT::i64:
2904 case MVT::bf16:
2905 case MVT::f16:
2906 case MVT::f32:
2907 case MVT::f64: {
2908 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2909 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2910 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2911 unsigned NumElts =
2913 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2914 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2915 return MVT::getScalableVectorVT(EltVT, NumElts);
2916 }
2917 }
2918}
2919
2921 const RISCVSubtarget &Subtarget) {
2923 Subtarget);
2924}
2925
2927 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2928}
2929
2930// Grow V to consume an entire RVV register.
2932 const RISCVSubtarget &Subtarget) {
2933 assert(VT.isScalableVector() &&
2934 "Expected to convert into a scalable vector!");
2935 assert(V.getValueType().isFixedLengthVector() &&
2936 "Expected a fixed length vector operand!");
2937 SDLoc DL(V);
2938 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
2939}
2940
2941// Shrink V so it's just big enough to maintain a VT's worth of data.
2943 const RISCVSubtarget &Subtarget) {
2945 "Expected to convert into a fixed length vector!");
2946 assert(V.getValueType().isScalableVector() &&
2947 "Expected a scalable vector operand!");
2948 SDLoc DL(V);
2949 return DAG.getExtractSubvector(DL, VT, V, 0);
2950}
2951
2952/// Return the type of the mask type suitable for masking the provided
2953/// vector type. This is simply an i1 element type vector of the same
2954/// (possibly scalable) length.
2955static MVT getMaskTypeFor(MVT VecVT) {
2956 assert(VecVT.isVector());
2958 return MVT::getVectorVT(MVT::i1, EC);
2959}
2960
2961/// Creates an all ones mask suitable for masking a vector of type VecTy with
2962/// vector length VL. .
2963static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2964 SelectionDAG &DAG) {
2965 MVT MaskVT = getMaskTypeFor(VecVT);
2966 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2967}
2968
2969static std::pair<SDValue, SDValue>
2971 const RISCVSubtarget &Subtarget) {
2972 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2973 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2974 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2975 return {Mask, VL};
2976}
2977
2978static std::pair<SDValue, SDValue>
2979getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2980 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2981 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2982 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2983 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2984 return {Mask, VL};
2985}
2986
2987// Gets the two common "VL" operands: an all-ones mask and the vector length.
2988// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2989// the vector type that the fixed-length vector is contained in. Otherwise if
2990// VecVT is scalable, then ContainerVT should be the same as VecVT.
2991static std::pair<SDValue, SDValue>
2992getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2993 const RISCVSubtarget &Subtarget) {
2994 if (VecVT.isFixedLengthVector())
2995 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2996 Subtarget);
2997 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2998 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2999}
3000
3002 SelectionDAG &DAG) const {
3003 assert(VecVT.isScalableVector() && "Expected scalable vector");
3004 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
3005 VecVT.getVectorElementCount());
3006}
3007
3008std::pair<unsigned, unsigned>
3010 const RISCVSubtarget &Subtarget) {
3011 assert(VecVT.isScalableVector() && "Expected scalable vector");
3012
3013 unsigned EltSize = VecVT.getScalarSizeInBits();
3014 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3015
3016 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3017 unsigned MaxVLMAX =
3018 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3019
3020 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3021 unsigned MinVLMAX =
3022 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3023
3024 return std::make_pair(MinVLMAX, MaxVLMAX);
3025}
3026
3027// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3028// of either is (currently) supported. This can get us into an infinite loop
3029// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3030// as a ..., etc.
3031// Until either (or both) of these can reliably lower any node, reporting that
3032// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3033// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3034// which is not desirable.
3036 EVT VT, unsigned DefinedValues) const {
3037 return false;
3038}
3039
3041 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3042 // implementation-defined.
3043 if (!VT.isVector())
3045 unsigned DLenFactor = Subtarget.getDLenFactor();
3046 unsigned Cost;
3047 if (VT.isScalableVector()) {
3048 unsigned LMul;
3049 bool Fractional;
3050 std::tie(LMul, Fractional) =
3052 if (Fractional)
3053 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3054 else
3055 Cost = (LMul * DLenFactor);
3056 } else {
3057 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3058 }
3059 return Cost;
3060}
3061
3062
3063/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3064/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3065/// be by default. VRGatherCostModel reflects available options. Note that
3066/// operand (index and possibly mask) are handled separately.
3068 auto LMULCost = getLMULCost(VT);
3069 bool Log2CostModel =
3070 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3071 if (Log2CostModel && LMULCost.isValid()) {
3072 unsigned Log = Log2_64(LMULCost.getValue());
3073 if (Log > 0)
3074 return LMULCost * Log;
3075 }
3076 return LMULCost * LMULCost;
3077}
3078
3079/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3080/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3081/// or may track the vrgather.vv cost. It is implementation-dependent.
3085
3086/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3087/// for the type VT. (This does not cover the vslide1up or vslide1down
3088/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3089/// or may track the vrgather.vv cost. It is implementation-dependent.
3093
3094/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3095/// for the type VT. (This does not cover the vslide1up or vslide1down
3096/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3097/// or may track the vrgather.vv cost. It is implementation-dependent.
3101
3103 const RISCVSubtarget &Subtarget) {
3104 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3105 // bf16 conversions are always promoted to f32.
3106 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3107 Op.getValueType() == MVT::bf16) {
3108 bool IsStrict = Op->isStrictFPOpcode();
3109
3110 SDLoc DL(Op);
3111 if (IsStrict) {
3112 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3113 {Op.getOperand(0), Op.getOperand(1)});
3114 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3115 {Op.getValueType(), MVT::Other},
3116 {Val.getValue(1), Val.getValue(0),
3117 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3118 }
3119 return DAG.getNode(
3120 ISD::FP_ROUND, DL, Op.getValueType(),
3121 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3122 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3123 }
3124
3125 // Other operations are legal.
3126 return Op;
3127}
3128
3130 const RISCVSubtarget &Subtarget) {
3131 // RISC-V FP-to-int conversions saturate to the destination register size, but
3132 // don't produce 0 for nan. We can use a conversion instruction and fix the
3133 // nan case with a compare and a select.
3134 SDValue Src = Op.getOperand(0);
3135
3136 MVT DstVT = Op.getSimpleValueType();
3137 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3138
3139 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3140
3141 if (!DstVT.isVector()) {
3142 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3143 // the result.
3144 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3145 Src.getValueType() == MVT::bf16) {
3146 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3147 }
3148
3149 unsigned Opc;
3150 if (SatVT == DstVT)
3151 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3152 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3153 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3154 else
3155 return SDValue();
3156 // FIXME: Support other SatVTs by clamping before or after the conversion.
3157
3158 SDLoc DL(Op);
3159 SDValue FpToInt = DAG.getNode(
3160 Opc, DL, DstVT, Src,
3162
3163 if (Opc == RISCVISD::FCVT_WU_RV64)
3164 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3165
3166 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3167 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3169 }
3170
3171 // Vectors.
3172
3173 MVT DstEltVT = DstVT.getVectorElementType();
3174 MVT SrcVT = Src.getSimpleValueType();
3175 MVT SrcEltVT = SrcVT.getVectorElementType();
3176 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3177 unsigned DstEltSize = DstEltVT.getSizeInBits();
3178
3179 // Only handle saturating to the destination type.
3180 if (SatVT != DstEltVT)
3181 return SDValue();
3182
3183 MVT DstContainerVT = DstVT;
3184 MVT SrcContainerVT = SrcVT;
3185 if (DstVT.isFixedLengthVector()) {
3186 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3187 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3188 assert(DstContainerVT.getVectorElementCount() ==
3189 SrcContainerVT.getVectorElementCount() &&
3190 "Expected same element count");
3191 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3192 }
3193
3194 SDLoc DL(Op);
3195
3196 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3197
3198 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3199 {Src, Src, DAG.getCondCode(ISD::SETNE),
3200 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3201
3202 // Need to widen by more than 1 step, promote the FP type, then do a widening
3203 // convert.
3204 if (DstEltSize > (2 * SrcEltSize)) {
3205 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3206 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3207 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3208 }
3209
3210 MVT CvtContainerVT = DstContainerVT;
3211 MVT CvtEltVT = DstEltVT;
3212 if (SrcEltSize > (2 * DstEltSize)) {
3213 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3214 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3215 }
3216
3217 unsigned RVVOpc =
3218 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3219 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3220
3221 while (CvtContainerVT != DstContainerVT) {
3222 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3223 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3224 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3225 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3226 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3227 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3228 }
3229
3230 SDValue SplatZero = DAG.getNode(
3231 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3232 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3233 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3234 Res, DAG.getUNDEF(DstContainerVT), VL);
3235
3236 if (DstVT.isFixedLengthVector())
3237 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3238
3239 return Res;
3240}
3241
3243 const RISCVSubtarget &Subtarget) {
3244 bool IsStrict = Op->isStrictFPOpcode();
3245 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3246
3247 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3248 // bf16 conversions are always promoted to f32.
3249 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3250 SrcVal.getValueType() == MVT::bf16) {
3251 SDLoc DL(Op);
3252 if (IsStrict) {
3253 SDValue Ext =
3254 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3255 {Op.getOperand(0), SrcVal});
3256 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3257 {Ext.getValue(1), Ext.getValue(0)});
3258 }
3259 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3260 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3261 }
3262
3263 // Other operations are legal.
3264 return Op;
3265}
3266
3268 switch (Opc) {
3269 case ISD::FROUNDEVEN:
3271 case ISD::VP_FROUNDEVEN:
3272 return RISCVFPRndMode::RNE;
3273 case ISD::FTRUNC:
3274 case ISD::STRICT_FTRUNC:
3275 case ISD::VP_FROUNDTOZERO:
3276 return RISCVFPRndMode::RTZ;
3277 case ISD::FFLOOR:
3278 case ISD::STRICT_FFLOOR:
3279 case ISD::VP_FFLOOR:
3280 return RISCVFPRndMode::RDN;
3281 case ISD::FCEIL:
3282 case ISD::STRICT_FCEIL:
3283 case ISD::VP_FCEIL:
3284 return RISCVFPRndMode::RUP;
3285 case ISD::FROUND:
3286 case ISD::LROUND:
3287 case ISD::LLROUND:
3288 case ISD::STRICT_FROUND:
3289 case ISD::STRICT_LROUND:
3291 case ISD::VP_FROUND:
3292 return RISCVFPRndMode::RMM;
3293 case ISD::FRINT:
3294 case ISD::LRINT:
3295 case ISD::LLRINT:
3296 case ISD::STRICT_FRINT:
3297 case ISD::STRICT_LRINT:
3298 case ISD::STRICT_LLRINT:
3299 case ISD::VP_FRINT:
3300 case ISD::VP_LRINT:
3301 case ISD::VP_LLRINT:
3302 return RISCVFPRndMode::DYN;
3303 }
3304
3306}
3307
3308// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3309// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3310// the integer domain and back. Taking care to avoid converting values that are
3311// nan or already correct.
3312static SDValue
3314 const RISCVSubtarget &Subtarget) {
3315 MVT VT = Op.getSimpleValueType();
3316 assert(VT.isVector() && "Unexpected type");
3317
3318 SDLoc DL(Op);
3319
3320 SDValue Src = Op.getOperand(0);
3321
3322 // Freeze the source since we are increasing the number of uses.
3323 Src = DAG.getFreeze(Src);
3324
3325 MVT ContainerVT = VT;
3326 if (VT.isFixedLengthVector()) {
3327 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3328 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3329 }
3330
3331 SDValue Mask, VL;
3332 if (Op->isVPOpcode()) {
3333 Mask = Op.getOperand(1);
3334 if (VT.isFixedLengthVector())
3335 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3336 Subtarget);
3337 VL = Op.getOperand(2);
3338 } else {
3339 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3340 }
3341
3342 // We do the conversion on the absolute value and fix the sign at the end.
3343 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3344
3345 // Determine the largest integer that can be represented exactly. This and
3346 // values larger than it don't have any fractional bits so don't need to
3347 // be converted.
3348 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3349 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3350 APFloat MaxVal = APFloat(FltSem);
3351 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3352 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3353 SDValue MaxValNode =
3354 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3355 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3356 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3357
3358 // If abs(Src) was larger than MaxVal or nan, keep it.
3359 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3360 Mask =
3361 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3362 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3363 Mask, Mask, VL});
3364
3365 // Truncate to integer and convert back to FP.
3366 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3367 MVT XLenVT = Subtarget.getXLenVT();
3368 SDValue Truncated;
3369
3370 switch (Op.getOpcode()) {
3371 default:
3372 llvm_unreachable("Unexpected opcode");
3373 case ISD::FRINT:
3374 case ISD::VP_FRINT:
3375 case ISD::FCEIL:
3376 case ISD::VP_FCEIL:
3377 case ISD::FFLOOR:
3378 case ISD::VP_FFLOOR:
3379 case ISD::FROUND:
3380 case ISD::FROUNDEVEN:
3381 case ISD::VP_FROUND:
3382 case ISD::VP_FROUNDEVEN:
3383 case ISD::VP_FROUNDTOZERO: {
3386 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3387 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3388 break;
3389 }
3390 case ISD::FTRUNC:
3391 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3392 Mask, VL);
3393 break;
3394 case ISD::FNEARBYINT:
3395 case ISD::VP_FNEARBYINT:
3396 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3397 Mask, VL);
3398 break;
3399 }
3400
3401 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3402 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3403 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3404 Mask, VL);
3405
3406 // Restore the original sign so that -0.0 is preserved.
3407 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3408 Src, Src, Mask, VL);
3409
3410 if (!VT.isFixedLengthVector())
3411 return Truncated;
3412
3413 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3414}
3415
3416// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3417// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3418// qNan and converting the new source to integer and back to FP.
3419static SDValue
3421 const RISCVSubtarget &Subtarget) {
3422 SDLoc DL(Op);
3423 MVT VT = Op.getSimpleValueType();
3424 SDValue Chain = Op.getOperand(0);
3425 SDValue Src = Op.getOperand(1);
3426
3427 MVT ContainerVT = VT;
3428 if (VT.isFixedLengthVector()) {
3429 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3430 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3431 }
3432
3433 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3434
3435 // Freeze the source since we are increasing the number of uses.
3436 Src = DAG.getFreeze(Src);
3437
3438 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3439 MVT MaskVT = Mask.getSimpleValueType();
3440 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3441 DAG.getVTList(MaskVT, MVT::Other),
3442 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3443 DAG.getUNDEF(MaskVT), Mask, VL});
3444 Chain = Unorder.getValue(1);
3445 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3446 DAG.getVTList(ContainerVT, MVT::Other),
3447 {Chain, Src, Src, Src, Unorder, VL});
3448 Chain = Src.getValue(1);
3449
3450 // We do the conversion on the absolute value and fix the sign at the end.
3451 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3452
3453 // Determine the largest integer that can be represented exactly. This and
3454 // values larger than it don't have any fractional bits so don't need to
3455 // be converted.
3456 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3457 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3458 APFloat MaxVal = APFloat(FltSem);
3459 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3460 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3461 SDValue MaxValNode =
3462 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3463 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3464 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3465
3466 // If abs(Src) was larger than MaxVal or nan, keep it.
3467 Mask = DAG.getNode(
3468 RISCVISD::SETCC_VL, DL, MaskVT,
3469 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3470
3471 // Truncate to integer and convert back to FP.
3472 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3473 MVT XLenVT = Subtarget.getXLenVT();
3474 SDValue Truncated;
3475
3476 switch (Op.getOpcode()) {
3477 default:
3478 llvm_unreachable("Unexpected opcode");
3479 case ISD::STRICT_FCEIL:
3480 case ISD::STRICT_FFLOOR:
3481 case ISD::STRICT_FROUND:
3485 Truncated = DAG.getNode(
3486 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3487 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3488 break;
3489 }
3490 case ISD::STRICT_FTRUNC:
3491 Truncated =
3492 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3493 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3494 break;
3496 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3497 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3498 Mask, VL);
3499 break;
3500 }
3501 Chain = Truncated.getValue(1);
3502
3503 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3504 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3505 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3506 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3507 Truncated, Mask, VL);
3508 Chain = Truncated.getValue(1);
3509 }
3510
3511 // Restore the original sign so that -0.0 is preserved.
3512 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3513 Src, Src, Mask, VL);
3514
3515 if (VT.isFixedLengthVector())
3516 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3517 return DAG.getMergeValues({Truncated, Chain}, DL);
3518}
3519
3520static SDValue
3522 const RISCVSubtarget &Subtarget) {
3523 MVT VT = Op.getSimpleValueType();
3524 if (VT.isVector())
3525 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3526
3527 if (DAG.shouldOptForSize())
3528 return SDValue();
3529
3530 SDLoc DL(Op);
3531 SDValue Src = Op.getOperand(0);
3532
3533 // Create an integer the size of the mantissa with the MSB set. This and all
3534 // values larger than it don't have any fractional bits so don't need to be
3535 // converted.
3536 const fltSemantics &FltSem = VT.getFltSemantics();
3537 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3538 APFloat MaxVal = APFloat(FltSem);
3539 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3540 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3541 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3542
3544 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3545 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3546}
3547
3548// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3550 const RISCVSubtarget &Subtarget) {
3551 SDLoc DL(Op);
3552 MVT DstVT = Op.getSimpleValueType();
3553 SDValue Src = Op.getOperand(0);
3554 MVT SrcVT = Src.getSimpleValueType();
3555 assert(SrcVT.isVector() && DstVT.isVector() &&
3556 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3557 "Unexpected type");
3558
3559 MVT DstContainerVT = DstVT;
3560 MVT SrcContainerVT = SrcVT;
3561
3562 if (DstVT.isFixedLengthVector()) {
3563 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3564 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3565 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3566 }
3567
3568 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3569
3570 // [b]f16 -> f32
3571 MVT SrcElemType = SrcVT.getVectorElementType();
3572 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3573 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3574 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3575 }
3576
3577 SDValue Res =
3578 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3579 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3580 Subtarget.getXLenVT()),
3581 VL);
3582
3583 if (!DstVT.isFixedLengthVector())
3584 return Res;
3585
3586 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3587}
3588
3589static SDValue
3591 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3592 SDValue Offset, SDValue Mask, SDValue VL,
3594 if (Passthru.isUndef())
3596 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3597 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3598 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3599}
3600
3601static SDValue
3602getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3603 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3604 SDValue VL,
3606 if (Passthru.isUndef())
3608 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3609 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3610 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3611}
3612
3616 int64_t Addend;
3617};
3618
3619static std::optional<APInt> getExactInteger(const APFloat &APF,
3621 // We will use a SINT_TO_FP to materialize this constant so we should use a
3622 // signed APSInt here.
3623 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3624 // We use an arbitrary rounding mode here. If a floating-point is an exact
3625 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3626 // the rounding mode changes the output value, then it is not an exact
3627 // integer.
3629 bool IsExact;
3630 // If it is out of signed integer range, it will return an invalid operation.
3631 // If it is not an exact integer, IsExact is false.
3632 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3634 !IsExact)
3635 return std::nullopt;
3636 return ValInt.extractBits(BitWidth, 0);
3637}
3638
3639// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3640// to the (non-zero) step S and start value X. This can be then lowered as the
3641// RVV sequence (VID * S) + X, for example.
3642// The step S is represented as an integer numerator divided by a positive
3643// denominator. Note that the implementation currently only identifies
3644// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3645// cannot detect 2/3, for example.
3646// Note that this method will also match potentially unappealing index
3647// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3648// determine whether this is worth generating code for.
3649//
3650// EltSizeInBits is the size of the type that the sequence will be calculated
3651// in, i.e. SEW for build_vectors or XLEN for address calculations.
3652static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3653 unsigned EltSizeInBits) {
3654 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3656 return std::nullopt;
3657 bool IsInteger = Op.getValueType().isInteger();
3658
3659 std::optional<unsigned> SeqStepDenom;
3660 std::optional<APInt> SeqStepNum;
3661 std::optional<APInt> SeqAddend;
3662 std::optional<std::pair<APInt, unsigned>> PrevElt;
3663 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3664
3665 // First extract the ops into a list of constant integer values. This may not
3666 // be possible for floats if they're not all representable as integers.
3667 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3668 const unsigned OpSize = Op.getScalarValueSizeInBits();
3669 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3670 if (Elt.isUndef()) {
3671 Elts[Idx] = std::nullopt;
3672 continue;
3673 }
3674 if (IsInteger) {
3675 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3676 } else {
3677 auto ExactInteger =
3678 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3679 if (!ExactInteger)
3680 return std::nullopt;
3681 Elts[Idx] = *ExactInteger;
3682 }
3683 }
3684
3685 for (auto [Idx, Elt] : enumerate(Elts)) {
3686 // Assume undef elements match the sequence; we just have to be careful
3687 // when interpolating across them.
3688 if (!Elt)
3689 continue;
3690
3691 if (PrevElt) {
3692 // Calculate the step since the last non-undef element, and ensure
3693 // it's consistent across the entire sequence.
3694 unsigned IdxDiff = Idx - PrevElt->second;
3695 APInt ValDiff = *Elt - PrevElt->first;
3696
3697 // A zero-value value difference means that we're somewhere in the middle
3698 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3699 // step change before evaluating the sequence.
3700 if (ValDiff == 0)
3701 continue;
3702
3703 int64_t Remainder = ValDiff.srem(IdxDiff);
3704 // Normalize the step if it's greater than 1.
3705 if (Remainder != ValDiff.getSExtValue()) {
3706 // The difference must cleanly divide the element span.
3707 if (Remainder != 0)
3708 return std::nullopt;
3709 ValDiff = ValDiff.sdiv(IdxDiff);
3710 IdxDiff = 1;
3711 }
3712
3713 if (!SeqStepNum)
3714 SeqStepNum = ValDiff;
3715 else if (ValDiff != SeqStepNum)
3716 return std::nullopt;
3717
3718 if (!SeqStepDenom)
3719 SeqStepDenom = IdxDiff;
3720 else if (IdxDiff != *SeqStepDenom)
3721 return std::nullopt;
3722 }
3723
3724 // Record this non-undef element for later.
3725 if (!PrevElt || PrevElt->first != *Elt)
3726 PrevElt = std::make_pair(*Elt, Idx);
3727 }
3728
3729 // We need to have logged a step for this to count as a legal index sequence.
3730 if (!SeqStepNum || !SeqStepDenom)
3731 return std::nullopt;
3732
3733 // Loop back through the sequence and validate elements we might have skipped
3734 // while waiting for a valid step. While doing this, log any sequence addend.
3735 for (auto [Idx, Elt] : enumerate(Elts)) {
3736 if (!Elt)
3737 continue;
3738 APInt ExpectedVal =
3739 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3740 *SeqStepNum)
3741 .sdiv(*SeqStepDenom);
3742
3743 APInt Addend = *Elt - ExpectedVal;
3744 if (!SeqAddend)
3745 SeqAddend = Addend;
3746 else if (Addend != SeqAddend)
3747 return std::nullopt;
3748 }
3749
3750 assert(SeqAddend && "Must have an addend if we have a step");
3751
3752 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3753 SeqAddend->getSExtValue()};
3754}
3755
3756// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3757// and lower it as a VRGATHER_VX_VL from the source vector.
3758static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3759 SelectionDAG &DAG,
3760 const RISCVSubtarget &Subtarget) {
3761 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3762 return SDValue();
3763 SDValue Src = SplatVal.getOperand(0);
3764 // Don't perform this optimization for i1 vectors, or if the element types are
3765 // different
3766 // FIXME: Support i1 vectors, maybe by promoting to i8?
3767 MVT EltTy = VT.getVectorElementType();
3768 if (EltTy == MVT::i1 ||
3769 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3770 return SDValue();
3771 MVT SrcVT = Src.getSimpleValueType();
3772 if (EltTy != SrcVT.getVectorElementType())
3773 return SDValue();
3774 SDValue Idx = SplatVal.getOperand(1);
3775 // The index must be a legal type.
3776 if (Idx.getValueType() != Subtarget.getXLenVT())
3777 return SDValue();
3778
3779 // Check that we know Idx lies within VT
3780 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3781 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3782 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3783 return SDValue();
3784 }
3785
3786 // Convert fixed length vectors to scalable
3787 MVT ContainerVT = VT;
3788 if (VT.isFixedLengthVector())
3789 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3790
3791 MVT SrcContainerVT = SrcVT;
3792 if (SrcVT.isFixedLengthVector()) {
3793 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3794 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3795 }
3796
3797 // Put Vec in a VT sized vector
3798 if (SrcContainerVT.getVectorMinNumElements() <
3799 ContainerVT.getVectorMinNumElements())
3800 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3801 else
3802 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3803
3804 // We checked that Idx fits inside VT earlier
3805 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3806 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3807 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3808 if (VT.isFixedLengthVector())
3809 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3810 return Gather;
3811}
3812
3814 const RISCVSubtarget &Subtarget) {
3815 MVT VT = Op.getSimpleValueType();
3816 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3817
3818 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3819
3820 SDLoc DL(Op);
3821 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3822
3823 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3824 int64_t StepNumerator = SimpleVID->StepNumerator;
3825 unsigned StepDenominator = SimpleVID->StepDenominator;
3826 int64_t Addend = SimpleVID->Addend;
3827
3828 assert(StepNumerator != 0 && "Invalid step");
3829 bool Negate = false;
3830 int64_t SplatStepVal = StepNumerator;
3831 unsigned StepOpcode = ISD::MUL;
3832 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3833 // anyway as the shift of 63 won't fit in uimm5.
3834 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3835 isPowerOf2_64(std::abs(StepNumerator))) {
3836 Negate = StepNumerator < 0;
3837 StepOpcode = ISD::SHL;
3838 SplatStepVal = Log2_64(std::abs(StepNumerator));
3839 }
3840
3841 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3842 // since it's the immediate value many RVV instructions accept. There is
3843 // no vmul.vi instruction so ensure multiply constant can fit in a
3844 // single addi instruction. For the addend, we allow up to 32 bits..
3845 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3846 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3847 isPowerOf2_32(StepDenominator) &&
3848 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3849 MVT VIDVT =
3851 MVT VIDContainerVT =
3852 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3853 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3854 // Convert right out of the scalable type so we can use standard ISD
3855 // nodes for the rest of the computation. If we used scalable types with
3856 // these, we'd lose the fixed-length vector info and generate worse
3857 // vsetvli code.
3858 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3859 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3860 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3861 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3862 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3863 }
3864 if (StepDenominator != 1) {
3865 SDValue SplatStep =
3866 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3867 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3868 }
3869 if (Addend != 0 || Negate) {
3870 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3871 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3872 VID);
3873 }
3874 if (VT.isFloatingPoint()) {
3875 // TODO: Use vfwcvt to reduce register pressure.
3876 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3877 }
3878 return VID;
3879 }
3880 }
3881
3882 return SDValue();
3883}
3884
3885/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3886/// which constitute a large proportion of the elements. In such cases we can
3887/// splat a vector with the dominant element and make up the shortfall with
3888/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3889/// Note that this includes vectors of 2 elements by association. The
3890/// upper-most element is the "dominant" one, allowing us to use a splat to
3891/// "insert" the upper element, and an insert of the lower element at position
3892/// 0, which improves codegen.
3894 const RISCVSubtarget &Subtarget) {
3895 MVT VT = Op.getSimpleValueType();
3896 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3897
3898 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3899
3900 SDLoc DL(Op);
3901 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3902
3903 MVT XLenVT = Subtarget.getXLenVT();
3904 unsigned NumElts = Op.getNumOperands();
3905
3906 SDValue DominantValue;
3907 unsigned MostCommonCount = 0;
3908 DenseMap<SDValue, unsigned> ValueCounts;
3909 unsigned NumUndefElts =
3910 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3911
3912 // Track the number of scalar loads we know we'd be inserting, estimated as
3913 // any non-zero floating-point constant. Other kinds of element are either
3914 // already in registers or are materialized on demand. The threshold at which
3915 // a vector load is more desirable than several scalar materializion and
3916 // vector-insertion instructions is not known.
3917 unsigned NumScalarLoads = 0;
3918
3919 for (SDValue V : Op->op_values()) {
3920 if (V.isUndef())
3921 continue;
3922
3923 unsigned &Count = ValueCounts[V];
3924 if (0 == Count)
3925 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3926 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3927
3928 // Is this value dominant? In case of a tie, prefer the highest element as
3929 // it's cheaper to insert near the beginning of a vector than it is at the
3930 // end.
3931 if (++Count >= MostCommonCount) {
3932 DominantValue = V;
3933 MostCommonCount = Count;
3934 }
3935 }
3936
3937 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3938 unsigned NumDefElts = NumElts - NumUndefElts;
3939 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3940
3941 // Don't perform this optimization when optimizing for size, since
3942 // materializing elements and inserting them tends to cause code bloat.
3943 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3944 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3945 ((MostCommonCount > DominantValueCountThreshold) ||
3946 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3947 // Start by splatting the most common element.
3948 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3949
3950 DenseSet<SDValue> Processed{DominantValue};
3951
3952 // We can handle an insert into the last element (of a splat) via
3953 // v(f)slide1down. This is slightly better than the vslideup insert
3954 // lowering as it avoids the need for a vector group temporary. It
3955 // is also better than using vmerge.vx as it avoids the need to
3956 // materialize the mask in a vector register.
3957 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3958 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3959 LastOp != DominantValue) {
3960 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3961 auto OpCode =
3962 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3963 if (!VT.isFloatingPoint())
3964 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3965 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3966 LastOp, Mask, VL);
3967 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3968 Processed.insert(LastOp);
3969 }
3970
3971 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3972 for (const auto &OpIdx : enumerate(Op->ops())) {
3973 const SDValue &V = OpIdx.value();
3974 if (V.isUndef() || !Processed.insert(V).second)
3975 continue;
3976 if (ValueCounts[V] == 1) {
3977 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
3978 } else {
3979 // Blend in all instances of this value using a VSELECT, using a
3980 // mask where each bit signals whether that element is the one
3981 // we're after.
3983 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3984 return DAG.getConstant(V == V1, DL, XLenVT);
3985 });
3986 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3987 DAG.getBuildVector(SelMaskTy, DL, Ops),
3988 DAG.getSplatBuildVector(VT, DL, V), Vec);
3989 }
3990 }
3991
3992 return Vec;
3993 }
3994
3995 return SDValue();
3996}
3997
3999 const RISCVSubtarget &Subtarget) {
4000 MVT VT = Op.getSimpleValueType();
4001 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4002
4003 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4004
4005 SDLoc DL(Op);
4006 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4007
4008 MVT XLenVT = Subtarget.getXLenVT();
4009 unsigned NumElts = Op.getNumOperands();
4010
4011 if (VT.getVectorElementType() == MVT::i1) {
4012 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4013 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4014 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4015 }
4016
4017 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4018 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4019 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4020 }
4021
4022 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4023 // scalar integer chunks whose bit-width depends on the number of mask
4024 // bits and XLEN.
4025 // First, determine the most appropriate scalar integer type to use. This
4026 // is at most XLenVT, but may be shrunk to a smaller vector element type
4027 // according to the size of the final vector - use i8 chunks rather than
4028 // XLenVT if we're producing a v8i1. This results in more consistent
4029 // codegen across RV32 and RV64.
4030 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4031 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4032 // If we have to use more than one INSERT_VECTOR_ELT then this
4033 // optimization is likely to increase code size; avoid performing it in
4034 // such a case. We can use a load from a constant pool in this case.
4035 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4036 return SDValue();
4037 // Now we can create our integer vector type. Note that it may be larger
4038 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4039 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4040 MVT IntegerViaVecVT =
4041 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4042 IntegerViaVecElts);
4043
4044 uint64_t Bits = 0;
4045 unsigned BitPos = 0, IntegerEltIdx = 0;
4046 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4047
4048 for (unsigned I = 0; I < NumElts;) {
4049 SDValue V = Op.getOperand(I);
4050 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4051 Bits |= ((uint64_t)BitValue << BitPos);
4052 ++BitPos;
4053 ++I;
4054
4055 // Once we accumulate enough bits to fill our scalar type or process the
4056 // last element, insert into our vector and clear our accumulated data.
4057 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4058 if (NumViaIntegerBits <= 32)
4059 Bits = SignExtend64<32>(Bits);
4060 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4061 Elts[IntegerEltIdx] = Elt;
4062 Bits = 0;
4063 BitPos = 0;
4064 IntegerEltIdx++;
4065 }
4066 }
4067
4068 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4069
4070 if (NumElts < NumViaIntegerBits) {
4071 // If we're producing a smaller vector than our minimum legal integer
4072 // type, bitcast to the equivalent (known-legal) mask type, and extract
4073 // our final mask.
4074 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4075 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4076 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4077 } else {
4078 // Else we must have produced an integer type with the same size as the
4079 // mask type; bitcast for the final result.
4080 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4081 Vec = DAG.getBitcast(VT, Vec);
4082 }
4083
4084 return Vec;
4085 }
4086
4088 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4089 : RISCVISD::VMV_V_X_VL;
4090 if (!VT.isFloatingPoint())
4091 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4092 Splat =
4093 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4094 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4095 }
4096
4097 // Try and match index sequences, which we can lower to the vid instruction
4098 // with optional modifications. An all-undef vector is matched by
4099 // getSplatValue, above.
4100 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4101 return Res;
4102
4103 // For very small build_vectors, use a single scalar insert of a constant.
4104 // TODO: Base this on constant rematerialization cost, not size.
4105 const unsigned EltBitSize = VT.getScalarSizeInBits();
4106 if (VT.getSizeInBits() <= 32 &&
4108 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4109 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4110 "Unexpected sequence type");
4111 // If we can use the original VL with the modified element type, this
4112 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4113 // be moved into InsertVSETVLI?
4114 unsigned ViaVecLen =
4115 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4116 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4117
4118 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4119 uint64_t SplatValue = 0;
4120 // Construct the amalgamated value at this larger vector type.
4121 for (const auto &OpIdx : enumerate(Op->op_values())) {
4122 const auto &SeqV = OpIdx.value();
4123 if (!SeqV.isUndef())
4124 SplatValue |=
4125 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4126 }
4127
4128 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4129 // achieve better constant materializion.
4130 // On RV32, we need to sign-extend to use getSignedConstant.
4131 if (ViaIntVT == MVT::i32)
4132 SplatValue = SignExtend64<32>(SplatValue);
4133
4134 SDValue Vec = DAG.getInsertVectorElt(
4135 DL, DAG.getUNDEF(ViaVecVT),
4136 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4137 if (ViaVecLen != 1)
4138 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4139 return DAG.getBitcast(VT, Vec);
4140 }
4141
4142
4143 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4144 // when re-interpreted as a vector with a larger element type. For example,
4145 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4146 // could be instead splat as
4147 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4148 // TODO: This optimization could also work on non-constant splats, but it
4149 // would require bit-manipulation instructions to construct the splat value.
4150 SmallVector<SDValue> Sequence;
4151 const auto *BV = cast<BuildVectorSDNode>(Op);
4152 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4154 BV->getRepeatedSequence(Sequence) &&
4155 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4156 unsigned SeqLen = Sequence.size();
4157 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4158 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4159 ViaIntVT == MVT::i64) &&
4160 "Unexpected sequence type");
4161
4162 // If we can use the original VL with the modified element type, this
4163 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4164 // be moved into InsertVSETVLI?
4165 const unsigned RequiredVL = NumElts / SeqLen;
4166 const unsigned ViaVecLen =
4167 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4168 NumElts : RequiredVL;
4169 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4170
4171 unsigned EltIdx = 0;
4172 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4173 uint64_t SplatValue = 0;
4174 // Construct the amalgamated value which can be splatted as this larger
4175 // vector type.
4176 for (const auto &SeqV : Sequence) {
4177 if (!SeqV.isUndef())
4178 SplatValue |=
4179 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4180 EltIdx++;
4181 }
4182
4183 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4184 // achieve better constant materializion.
4185 // On RV32, we need to sign-extend to use getSignedConstant.
4186 if (ViaIntVT == MVT::i32)
4187 SplatValue = SignExtend64<32>(SplatValue);
4188
4189 // Since we can't introduce illegal i64 types at this stage, we can only
4190 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4191 // way we can use RVV instructions to splat.
4192 assert((ViaIntVT.bitsLE(XLenVT) ||
4193 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4194 "Unexpected bitcast sequence");
4195 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4196 SDValue ViaVL =
4197 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4198 MVT ViaContainerVT =
4199 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4200 SDValue Splat =
4201 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4202 DAG.getUNDEF(ViaContainerVT),
4203 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4204 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4205 if (ViaVecLen != RequiredVL)
4207 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4208 return DAG.getBitcast(VT, Splat);
4209 }
4210 }
4211
4212 // If the number of signbits allows, see if we can lower as a <N x i8>.
4213 // Our main goal here is to reduce LMUL (and thus work) required to
4214 // build the constant, but we will also narrow if the resulting
4215 // narrow vector is known to materialize cheaply.
4216 // TODO: We really should be costing the smaller vector. There are
4217 // profitable cases this misses.
4218 if (EltBitSize > 8 && VT.isInteger() &&
4219 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4220 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4221 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4222 DL, Op->ops());
4223 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4224 Source, DAG, Subtarget);
4225 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4226 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4227 }
4228
4229 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4230 return Res;
4231
4232 // For constant vectors, use generic constant pool lowering. Otherwise,
4233 // we'd have to materialize constants in GPRs just to move them into the
4234 // vector.
4235 return SDValue();
4236}
4237
4238static unsigned getPACKOpcode(unsigned DestBW,
4239 const RISCVSubtarget &Subtarget) {
4240 switch (DestBW) {
4241 default:
4242 llvm_unreachable("Unsupported pack size");
4243 case 16:
4244 return RISCV::PACKH;
4245 case 32:
4246 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4247 case 64:
4248 assert(Subtarget.is64Bit());
4249 return RISCV::PACK;
4250 }
4251}
4252
4253/// Double the element size of the build vector to reduce the number
4254/// of vslide1down in the build vector chain. In the worst case, this
4255/// trades three scalar operations for 1 vector operation. Scalar
4256/// operations are generally lower latency, and for out-of-order cores
4257/// we also benefit from additional parallelism.
4259 const RISCVSubtarget &Subtarget) {
4260 SDLoc DL(Op);
4261 MVT VT = Op.getSimpleValueType();
4262 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4263 MVT ElemVT = VT.getVectorElementType();
4264 if (!ElemVT.isInteger())
4265 return SDValue();
4266
4267 // TODO: Relax these architectural restrictions, possibly with costing
4268 // of the actual instructions required.
4269 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4270 return SDValue();
4271
4272 unsigned NumElts = VT.getVectorNumElements();
4273 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4274 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4275 NumElts % 2 != 0)
4276 return SDValue();
4277
4278 // Produce [B,A] packed into a type twice as wide. Note that all
4279 // scalars are XLenVT, possibly masked (see below).
4280 MVT XLenVT = Subtarget.getXLenVT();
4281 SDValue Mask = DAG.getConstant(
4282 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4283 auto pack = [&](SDValue A, SDValue B) {
4284 // Bias the scheduling of the inserted operations to near the
4285 // definition of the element - this tends to reduce register
4286 // pressure overall.
4287 SDLoc ElemDL(B);
4288 if (Subtarget.hasStdExtZbkb())
4289 // Note that we're relying on the high bits of the result being
4290 // don't care. For PACKW, the result is *sign* extended.
4291 return SDValue(
4292 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4293 ElemDL, XLenVT, A, B),
4294 0);
4295
4296 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4297 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4298 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4299 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4300 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4302 };
4303
4304 SmallVector<SDValue> NewOperands;
4305 NewOperands.reserve(NumElts / 2);
4306 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4307 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4308 assert(NumElts == NewOperands.size() * 2);
4309 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4310 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4311 return DAG.getNode(ISD::BITCAST, DL, VT,
4312 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4313}
4314
4316 const RISCVSubtarget &Subtarget) {
4317 MVT VT = Op.getSimpleValueType();
4318 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4319
4320 MVT EltVT = VT.getVectorElementType();
4321 MVT XLenVT = Subtarget.getXLenVT();
4322
4323 SDLoc DL(Op);
4324
4325 // Proper support for f16 requires Zvfh. bf16 always requires special
4326 // handling. We need to cast the scalar to integer and create an integer
4327 // build_vector.
4328 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4329 MVT IVT = VT.changeVectorElementType(MVT::i16);
4330 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4331 for (const auto &[I, U] : enumerate(Op->ops())) {
4332 SDValue Elem = U.get();
4333 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4334 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4335 // Called by LegalizeDAG, we need to use XLenVT operations since we
4336 // can't create illegal types.
4337 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4338 // Manually constant fold so the integer build_vector can be lowered
4339 // better. Waiting for DAGCombine will be too late.
4340 APInt V =
4341 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4342 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4343 } else {
4344 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4345 }
4346 } else {
4347 // Called by scalar type legalizer, we can use i16.
4348 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4349 }
4350 }
4351 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4352 return DAG.getBitcast(VT, Res);
4353 }
4354
4355 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4357 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4358
4359 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4360
4361 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4362
4363 if (VT.getVectorElementType() == MVT::i1) {
4364 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4365 // vector type, we have a legal equivalently-sized i8 type, so we can use
4366 // that.
4367 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4368 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4369
4370 SDValue WideVec;
4372 // For a splat, perform a scalar truncate before creating the wider
4373 // vector.
4374 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4375 DAG.getConstant(1, DL, Splat.getValueType()));
4376 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4377 } else {
4378 SmallVector<SDValue, 8> Ops(Op->op_values());
4379 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4380 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4381 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4382 }
4383
4384 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4385 }
4386
4388 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4389 return Gather;
4390
4391 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4392 // pressure at high LMUL.
4393 if (all_of(Op->ops().drop_front(),
4394 [](const SDUse &U) { return U.get().isUndef(); })) {
4395 unsigned Opc =
4396 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4397 if (!VT.isFloatingPoint())
4398 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4399 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4400 Splat, VL);
4401 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4402 }
4403
4404 unsigned Opc =
4405 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4406 if (!VT.isFloatingPoint())
4407 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4408 Splat =
4409 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4410 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4411 }
4412
4413 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4414 return Res;
4415
4416 // If we're compiling for an exact VLEN value, we can split our work per
4417 // register in the register group.
4418 if (const auto VLen = Subtarget.getRealVLen();
4419 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4420 MVT ElemVT = VT.getVectorElementType();
4421 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4422 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4423 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4424 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4425 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4426
4427 // The following semantically builds up a fixed length concat_vector
4428 // of the component build_vectors. We eagerly lower to scalable and
4429 // insert_subvector here to avoid DAG combining it back to a large
4430 // build_vector.
4431 SmallVector<SDValue> BuildVectorOps(Op->ops());
4432 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4433 SDValue Vec = DAG.getUNDEF(ContainerVT);
4434 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4435 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4436 SDValue SubBV =
4437 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4438 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4439 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4440 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4441 }
4442 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4443 }
4444
4445 // If we're about to resort to vslide1down (or stack usage), pack our
4446 // elements into the widest scalar type we can. This will force a VL/VTYPE
4447 // toggle, but reduces the critical path, the number of vslide1down ops
4448 // required, and possibly enables scalar folds of the values.
4449 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4450 return Res;
4451
4452 // For m1 vectors, if we have non-undef values in both halves of our vector,
4453 // split the vector into low and high halves, build them separately, then
4454 // use a vselect to combine them. For long vectors, this cuts the critical
4455 // path of the vslide1down sequence in half, and gives us an opportunity
4456 // to special case each half independently. Note that we don't change the
4457 // length of the sub-vectors here, so if both fallback to the generic
4458 // vslide1down path, we should be able to fold the vselect into the final
4459 // vslidedown (for the undef tail) for the first half w/ masking.
4460 unsigned NumElts = VT.getVectorNumElements();
4461 unsigned NumUndefElts =
4462 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4463 unsigned NumDefElts = NumElts - NumUndefElts;
4464 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4465 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4466 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4467 SmallVector<SDValue> MaskVals;
4468 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4469 SubVecAOps.reserve(NumElts);
4470 SubVecBOps.reserve(NumElts);
4471 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4472 SDValue Elem = U.get();
4473 if (Idx < NumElts / 2) {
4474 SubVecAOps.push_back(Elem);
4475 SubVecBOps.push_back(UndefElem);
4476 } else {
4477 SubVecAOps.push_back(UndefElem);
4478 SubVecBOps.push_back(Elem);
4479 }
4480 bool SelectMaskVal = (Idx < NumElts / 2);
4481 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4482 }
4483 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4484 MaskVals.size() == NumElts);
4485
4486 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4487 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4488 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4489 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4490 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4491 }
4492
4493 // Cap the cost at a value linear to the number of elements in the vector.
4494 // The default lowering is to use the stack. The vector store + scalar loads
4495 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4496 // being (at least) linear in LMUL. As a result, using the vslidedown
4497 // lowering for every element ends up being VL*LMUL..
4498 // TODO: Should we be directly costing the stack alternative? Doing so might
4499 // give us a more accurate upper bound.
4500 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4501
4502 // TODO: unify with TTI getSlideCost.
4503 InstructionCost PerSlideCost = 1;
4504 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4505 default: break;
4506 case RISCVVType::LMUL_2:
4507 PerSlideCost = 2;
4508 break;
4509 case RISCVVType::LMUL_4:
4510 PerSlideCost = 4;
4511 break;
4512 case RISCVVType::LMUL_8:
4513 PerSlideCost = 8;
4514 break;
4515 }
4516
4517 // TODO: Should we be using the build instseq then cost + evaluate scheme
4518 // we use for integer constants here?
4519 unsigned UndefCount = 0;
4520 for (const SDValue &V : Op->ops()) {
4521 if (V.isUndef()) {
4522 UndefCount++;
4523 continue;
4524 }
4525 if (UndefCount) {
4526 LinearBudget -= PerSlideCost;
4527 UndefCount = 0;
4528 }
4529 LinearBudget -= PerSlideCost;
4530 }
4531 if (UndefCount) {
4532 LinearBudget -= PerSlideCost;
4533 }
4534
4535 if (LinearBudget < 0)
4536 return SDValue();
4537
4538 assert((!VT.isFloatingPoint() ||
4539 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4540 "Illegal type which will result in reserved encoding");
4541
4542 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4543
4544 // General case: splat the first operand and slide other operands down one
4545 // by one to form a vector. Alternatively, if every operand is an
4546 // extraction from element 0 of a vector, we use that vector from the last
4547 // extraction as the start value and slide up instead of slide down. Such that
4548 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4549 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4550 // something we cannot do with vslide1down/vslidedown.
4551 // Of course, using vslide1up/vslideup might increase the register pressure,
4552 // and that's why we conservatively limit to cases where every operand is an
4553 // extraction from the first element.
4554 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4555 SDValue EVec;
4556 bool SlideUp = false;
4557 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4558 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4559 if (SlideUp)
4560 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4561 Mask, VL, Policy);
4562 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4563 Mask, VL, Policy);
4564 };
4565
4566 // The reason we don't use all_of here is because we're also capturing EVec
4567 // from the last non-undef operand. If the std::execution_policy of the
4568 // underlying std::all_of is anything but std::sequenced_policy we might
4569 // capture the wrong EVec.
4570 for (SDValue V : Operands) {
4571 using namespace SDPatternMatch;
4572 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4573 if (!SlideUp)
4574 break;
4575 }
4576
4577 // Do not slideup if the element type of EVec is different.
4578 if (SlideUp) {
4579 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4580 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4581 if (EVecEltVT != ContainerEltVT)
4582 SlideUp = false;
4583 }
4584
4585 if (SlideUp) {
4586 MVT EVecContainerVT = EVec.getSimpleValueType();
4587 // Make sure the original vector has scalable vector type.
4588 if (EVecContainerVT.isFixedLengthVector()) {
4589 EVecContainerVT =
4590 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4591 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4592 }
4593
4594 // Adapt EVec's type into ContainerVT.
4595 if (EVecContainerVT.getVectorMinNumElements() <
4596 ContainerVT.getVectorMinNumElements())
4597 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4598 else
4599 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4600
4601 // Reverse the elements as we're going to slide up from the last element.
4602 std::reverse(Operands.begin(), Operands.end());
4603 }
4604
4605 SDValue Vec;
4606 UndefCount = 0;
4607 for (SDValue V : Operands) {
4608 if (V.isUndef()) {
4609 UndefCount++;
4610 continue;
4611 }
4612
4613 // Start our sequence with either a TA splat or extract source in the
4614 // hopes that hardware is able to recognize there's no dependency on the
4615 // prior value of our temporary register.
4616 if (!Vec) {
4617 if (SlideUp) {
4618 Vec = EVec;
4619 } else {
4620 Vec = DAG.getSplatVector(VT, DL, V);
4621 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4622 }
4623
4624 UndefCount = 0;
4625 continue;
4626 }
4627
4628 if (UndefCount) {
4629 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4630 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4631 VL);
4632 UndefCount = 0;
4633 }
4634
4635 unsigned Opcode;
4636 if (VT.isFloatingPoint())
4637 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4638 else
4639 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4640
4641 if (!VT.isFloatingPoint())
4642 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4643 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4644 V, Mask, VL);
4645 }
4646 if (UndefCount) {
4647 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4648 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4649 VL);
4650 }
4651 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4652}
4653
4654static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4656 SelectionDAG &DAG) {
4657 if (!Passthru)
4658 Passthru = DAG.getUNDEF(VT);
4660 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4661 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4662 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4663 // node in order to try and match RVV vector/scalar instructions.
4664 if ((LoC >> 31) == HiC)
4665 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4666
4667 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4668 // VL. This can temporarily increase VL if VL less than VLMAX.
4669 if (LoC == HiC) {
4670 SDValue NewVL;
4671 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4672 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4673 else
4674 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4675 MVT InterVT =
4676 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4677 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4678 DAG.getUNDEF(InterVT), Lo, NewVL);
4679 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4680 }
4681 }
4682
4683 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4684 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4685 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4686 Hi.getConstantOperandVal(1) == 31)
4687 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4688
4689 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4690 // even if it might be sign extended.
4691 if (Hi.isUndef())
4692 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4693
4694 // Fall back to a stack store and stride x0 vector load.
4695 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4696 Hi, VL);
4697}
4698
4699// Called by type legalization to handle splat of i64 on RV32.
4700// FIXME: We can optimize this when the type has sign or zero bits in one
4701// of the halves.
4702static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4703 SDValue Scalar, SDValue VL,
4704 SelectionDAG &DAG) {
4705 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4706 SDValue Lo, Hi;
4707 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4708 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4709}
4710
4711// This function lowers a splat of a scalar operand Splat with the vector
4712// length VL. It ensures the final sequence is type legal, which is useful when
4713// lowering a splat after type legalization.
4714static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4715 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4716 const RISCVSubtarget &Subtarget) {
4717 bool HasPassthru = Passthru && !Passthru.isUndef();
4718 if (!HasPassthru && !Passthru)
4719 Passthru = DAG.getUNDEF(VT);
4720
4721 MVT EltVT = VT.getVectorElementType();
4722 MVT XLenVT = Subtarget.getXLenVT();
4723
4724 if (VT.isFloatingPoint()) {
4725 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4726 EltVT == MVT::bf16) {
4727 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4728 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4729 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4730 else
4731 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4732 MVT IVT = VT.changeVectorElementType(MVT::i16);
4733 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4734 SDValue Splat =
4735 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4736 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4737 }
4738 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4739 }
4740
4741 // Simplest case is that the operand needs to be promoted to XLenVT.
4742 if (Scalar.getValueType().bitsLE(XLenVT)) {
4743 // If the operand is a constant, sign extend to increase our chances
4744 // of being able to use a .vi instruction. ANY_EXTEND would become a
4745 // a zero extend and the simm5 check in isel would fail.
4746 // FIXME: Should we ignore the upper bits in isel instead?
4747 unsigned ExtOpc =
4749 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4750 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4751 }
4752
4753 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4754 "Unexpected scalar for splat lowering!");
4755
4756 if (isOneConstant(VL) && isNullConstant(Scalar))
4757 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4758 DAG.getConstant(0, DL, XLenVT), VL);
4759
4760 // Otherwise use the more complicated splatting algorithm.
4761 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4762}
4763
4764// This function lowers an insert of a scalar operand Scalar into lane
4765// 0 of the vector regardless of the value of VL. The contents of the
4766// remaining lanes of the result vector are unspecified. VL is assumed
4767// to be non-zero.
4769 const SDLoc &DL, SelectionDAG &DAG,
4770 const RISCVSubtarget &Subtarget) {
4771 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4772
4773 const MVT XLenVT = Subtarget.getXLenVT();
4774 SDValue Passthru = DAG.getUNDEF(VT);
4775
4776 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4777 isNullConstant(Scalar.getOperand(1))) {
4778 SDValue ExtractedVal = Scalar.getOperand(0);
4779 // The element types must be the same.
4780 if (ExtractedVal.getValueType().getVectorElementType() ==
4781 VT.getVectorElementType()) {
4782 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4783 MVT ExtractedContainerVT = ExtractedVT;
4784 if (ExtractedContainerVT.isFixedLengthVector()) {
4785 ExtractedContainerVT = getContainerForFixedLengthVector(
4786 DAG, ExtractedContainerVT, Subtarget);
4787 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4788 ExtractedVal, DAG, Subtarget);
4789 }
4790 if (ExtractedContainerVT.bitsLE(VT))
4791 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4792 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4793 }
4794 }
4795
4796 if (VT.isFloatingPoint())
4797 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4798 VL);
4799
4800 // Avoid the tricky legalization cases by falling back to using the
4801 // splat code which already handles it gracefully.
4802 if (!Scalar.getValueType().bitsLE(XLenVT))
4803 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4804 DAG.getConstant(1, DL, XLenVT),
4805 VT, DL, DAG, Subtarget);
4806
4807 // If the operand is a constant, sign extend to increase our chances
4808 // of being able to use a .vi instruction. ANY_EXTEND would become a
4809 // a zero extend and the simm5 check in isel would fail.
4810 // FIXME: Should we ignore the upper bits in isel instead?
4811 unsigned ExtOpc =
4813 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4814 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4815 VL);
4816}
4817
4818/// If concat_vector(V1,V2) could be folded away to some existing
4819/// vector source, return it. Note that the source may be larger
4820/// than the requested concat_vector (i.e. a extract_subvector
4821/// might be required.)
4823 EVT VT = V1.getValueType();
4824 assert(VT == V2.getValueType() && "argument types must match");
4825 // Both input must be extracts.
4826 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4828 return SDValue();
4829
4830 // Extracting from the same source.
4831 SDValue Src = V1.getOperand(0);
4832 if (Src != V2.getOperand(0) ||
4833 VT.isScalableVector() != Src.getValueType().isScalableVector())
4834 return SDValue();
4835
4836 // The extracts must extract the two halves of the source.
4837 if (V1.getConstantOperandVal(1) != 0 ||
4839 return SDValue();
4840
4841 return Src;
4842}
4843
4844// Can this shuffle be performed on exactly one (possibly larger) input?
4846
4847 if (V2.isUndef())
4848 return V1;
4849
4850 unsigned NumElts = VT.getVectorNumElements();
4851 // Src needs to have twice the number of elements.
4852 // TODO: Update shuffle lowering to add the extract subvector
4853 if (SDValue Src = foldConcatVector(V1, V2);
4854 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4855 return Src;
4856
4857 return SDValue();
4858}
4859
4860/// Is this shuffle interleaving contiguous elements from one vector into the
4861/// even elements and contiguous elements from another vector into the odd
4862/// elements. \p EvenSrc will contain the element that should be in the first
4863/// even element. \p OddSrc will contain the element that should be in the first
4864/// odd element. These can be the first element in a source or the element half
4865/// way through the source.
4866static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4867 int &OddSrc, const RISCVSubtarget &Subtarget) {
4868 // We need to be able to widen elements to the next larger integer type or
4869 // use the zip2a instruction at e64.
4870 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4871 !Subtarget.hasVendorXRivosVizip())
4872 return false;
4873
4874 int Size = Mask.size();
4875 int NumElts = VT.getVectorNumElements();
4876 assert(Size == (int)NumElts && "Unexpected mask size");
4877
4878 SmallVector<unsigned, 2> StartIndexes;
4879 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4880 return false;
4881
4882 EvenSrc = StartIndexes[0];
4883 OddSrc = StartIndexes[1];
4884
4885 // One source should be low half of first vector.
4886 if (EvenSrc != 0 && OddSrc != 0)
4887 return false;
4888
4889 // Subvectors will be subtracted from either at the start of the two input
4890 // vectors, or at the start and middle of the first vector if it's an unary
4891 // interleave.
4892 // In both cases, HalfNumElts will be extracted.
4893 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4894 // we'll create an illegal extract_subvector.
4895 // FIXME: We could support other values using a slidedown first.
4896 int HalfNumElts = NumElts / 2;
4897 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4898}
4899
4900/// Is this mask representing a masked combination of two slides?
4902 std::array<std::pair<int, int>, 2> &SrcInfo) {
4903 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4904 return false;
4905
4906 // Avoid matching vselect idioms
4907 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4908 return false;
4909 // Prefer vslideup as the second instruction, and identity
4910 // only as the initial instruction.
4911 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4912 SrcInfo[1].second == 0)
4913 std::swap(SrcInfo[0], SrcInfo[1]);
4914 assert(SrcInfo[0].first != -1 && "Must find one slide");
4915 return true;
4916}
4917
4918// Exactly matches the semantics of a previously existing custom matcher
4919// to allow migration to new matcher without changing output.
4920static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4921 unsigned NumElts) {
4922 if (SrcInfo[1].first == -1)
4923 return true;
4924 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4925 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4926}
4927
4928static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4929 ArrayRef<int> Mask, unsigned Factor,
4930 bool RequiredPolarity) {
4931 int NumElts = Mask.size();
4932 for (const auto &[Idx, M] : enumerate(Mask)) {
4933 if (M < 0)
4934 continue;
4935 int Src = M >= NumElts;
4936 int Diff = (int)Idx - (M % NumElts);
4937 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
4938 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
4939 "Must match exactly one of the two slides");
4940 if (RequiredPolarity != (C == (Idx / Factor) % 2))
4941 return false;
4942 }
4943 return true;
4944}
4945
4946/// Given a shuffle which can be represented as a pair of two slides,
4947/// see if it is a zipeven idiom. Zipeven is:
4948/// vs2: a0 a1 a2 a3
4949/// vs1: b0 b1 b2 b3
4950/// vd: a0 b0 a2 b2
4951static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
4952 ArrayRef<int> Mask, unsigned &Factor) {
4953 Factor = SrcInfo[1].second;
4954 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4955 Mask.size() % Factor == 0 &&
4956 isAlternating(SrcInfo, Mask, Factor, true);
4957}
4958
4959/// Given a shuffle which can be represented as a pair of two slides,
4960/// see if it is a zipodd idiom. Zipodd is:
4961/// vs2: a0 a1 a2 a3
4962/// vs1: b0 b1 b2 b3
4963/// vd: a1 b1 a3 b3
4964/// Note that the operand order is swapped due to the way we canonicalize
4965/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
4966static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
4967 ArrayRef<int> Mask, unsigned &Factor) {
4968 Factor = -SrcInfo[1].second;
4969 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
4970 Mask.size() % Factor == 0 &&
4971 isAlternating(SrcInfo, Mask, Factor, false);
4972}
4973
4974// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4975// 2, 4, 8 and the integer type Factor-times larger than VT's
4976// element type must be a legal element type.
4977// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4978// -> [p, q, r, s] (Factor=2, Index=1)
4980 SDValue Src, unsigned Factor,
4981 unsigned Index, SelectionDAG &DAG) {
4982 unsigned EltBits = VT.getScalarSizeInBits();
4983 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4984 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4985 SrcEC.divideCoefficientBy(Factor));
4986 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4987 SrcEC.divideCoefficientBy(Factor));
4988 Src = DAG.getBitcast(WideSrcVT, Src);
4989
4990 unsigned Shift = Index * EltBits;
4991 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4992 DAG.getConstant(Shift, DL, WideSrcVT));
4993 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4995 Res = DAG.getBitcast(CastVT, Res);
4996 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
4997}
4998
4999/// Match a single source shuffle which is an identity except that some
5000/// particular element is repeated. This can be lowered as a masked
5001/// vrgather.vi/vx. Note that the two source form of this is handled
5002/// by the recursive splitting logic and doesn't need special handling.
5004 const RISCVSubtarget &Subtarget,
5005 SelectionDAG &DAG) {
5006
5007 SDLoc DL(SVN);
5008 MVT VT = SVN->getSimpleValueType(0);
5009 SDValue V1 = SVN->getOperand(0);
5010 assert(SVN->getOperand(1).isUndef());
5011 ArrayRef<int> Mask = SVN->getMask();
5012 const unsigned NumElts = VT.getVectorNumElements();
5013 MVT XLenVT = Subtarget.getXLenVT();
5014
5015 std::optional<int> SplatIdx;
5016 for (auto [I, M] : enumerate(Mask)) {
5017 if (M == -1 || I == (unsigned)M)
5018 continue;
5019 if (SplatIdx && *SplatIdx != M)
5020 return SDValue();
5021 SplatIdx = M;
5022 }
5023
5024 if (!SplatIdx)
5025 return SDValue();
5026
5027 SmallVector<SDValue> MaskVals;
5028 for (int MaskIndex : Mask) {
5029 bool SelectMaskVal = MaskIndex == *SplatIdx;
5030 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5031 }
5032 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5033 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5034 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5035 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5036 SmallVector<int>(NumElts, *SplatIdx));
5037 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5038}
5039
5040// Lower the following shuffle to vslidedown.
5041// a)
5042// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5043// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5044// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5045// b)
5046// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5047// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5048// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5049// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5050// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5051// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5053 SDValue V1, SDValue V2,
5054 ArrayRef<int> Mask,
5055 const RISCVSubtarget &Subtarget,
5056 SelectionDAG &DAG) {
5057 auto findNonEXTRACT_SUBVECTORParent =
5058 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5059 uint64_t Offset = 0;
5060 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5061 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5062 // a scalable vector. But we don't want to match the case.
5063 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5064 Offset += Parent.getConstantOperandVal(1);
5065 Parent = Parent.getOperand(0);
5066 }
5067 return std::make_pair(Parent, Offset);
5068 };
5069
5070 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5071 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5072
5073 // Extracting from the same source.
5074 SDValue Src = V1Src;
5075 if (Src != V2Src)
5076 return SDValue();
5077
5078 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5079 SmallVector<int, 16> NewMask(Mask);
5080 for (size_t i = 0; i != NewMask.size(); ++i) {
5081 if (NewMask[i] == -1)
5082 continue;
5083
5084 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5085 NewMask[i] = NewMask[i] + V1IndexOffset;
5086 } else {
5087 // Minus NewMask.size() is needed. Otherwise, the b case would be
5088 // <5,6,7,12> instead of <5,6,7,8>.
5089 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5090 }
5091 }
5092
5093 // First index must be known and non-zero. It will be used as the slidedown
5094 // amount.
5095 if (NewMask[0] <= 0)
5096 return SDValue();
5097
5098 // NewMask is also continuous.
5099 for (unsigned i = 1; i != NewMask.size(); ++i)
5100 if (NewMask[i - 1] + 1 != NewMask[i])
5101 return SDValue();
5102
5103 MVT XLenVT = Subtarget.getXLenVT();
5104 MVT SrcVT = Src.getSimpleValueType();
5105 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5106 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5107 SDValue Slidedown =
5108 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5109 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5110 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5111 return DAG.getExtractSubvector(
5112 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5113}
5114
5115// Because vslideup leaves the destination elements at the start intact, we can
5116// use it to perform shuffles that insert subvectors:
5117//
5118// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5119// ->
5120// vsetvli zero, 8, e8, mf2, ta, ma
5121// vslideup.vi v8, v9, 4
5122//
5123// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5124// ->
5125// vsetvli zero, 5, e8, mf2, tu, ma
5126// vslideup.v1 v8, v9, 2
5128 SDValue V1, SDValue V2,
5129 ArrayRef<int> Mask,
5130 const RISCVSubtarget &Subtarget,
5131 SelectionDAG &DAG) {
5132 unsigned NumElts = VT.getVectorNumElements();
5133 int NumSubElts, Index;
5134 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5135 Index))
5136 return SDValue();
5137
5138 bool OpsSwapped = Mask[Index] < (int)NumElts;
5139 SDValue InPlace = OpsSwapped ? V2 : V1;
5140 SDValue ToInsert = OpsSwapped ? V1 : V2;
5141
5142 MVT XLenVT = Subtarget.getXLenVT();
5143 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5144 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5145 // We slide up by the index that the subvector is being inserted at, and set
5146 // VL to the index + the number of elements being inserted.
5147 unsigned Policy =
5149 // If the we're adding a suffix to the in place vector, i.e. inserting right
5150 // up to the very end of it, then we don't actually care about the tail.
5151 if (NumSubElts + Index >= (int)NumElts)
5152 Policy |= RISCVVType::TAIL_AGNOSTIC;
5153
5154 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5155 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5156 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5157
5158 SDValue Res;
5159 // If we're inserting into the lowest elements, use a tail undisturbed
5160 // vmv.v.v.
5161 if (Index == 0)
5162 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5163 VL);
5164 else
5165 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5166 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5167 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5168}
5169
5170/// Match v(f)slide1up/down idioms. These operations involve sliding
5171/// N-1 elements to make room for an inserted scalar at one end.
5173 SDValue V1, SDValue V2,
5174 ArrayRef<int> Mask,
5175 const RISCVSubtarget &Subtarget,
5176 SelectionDAG &DAG) {
5177 bool OpsSwapped = false;
5178 if (!isa<BuildVectorSDNode>(V1)) {
5179 if (!isa<BuildVectorSDNode>(V2))
5180 return SDValue();
5181 std::swap(V1, V2);
5182 OpsSwapped = true;
5183 }
5184 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5185 if (!Splat)
5186 return SDValue();
5187
5188 // Return true if the mask could describe a slide of Mask.size() - 1
5189 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5190 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5191 const unsigned S = (Offset > 0) ? 0 : -Offset;
5192 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5193 for (unsigned i = S; i != E; ++i)
5194 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5195 return false;
5196 return true;
5197 };
5198
5199 const unsigned NumElts = VT.getVectorNumElements();
5200 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5201 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5202 return SDValue();
5203
5204 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5205 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5206 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5207 return SDValue();
5208
5209 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5210 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5211
5212 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5213 // vslide1{down,up}.vx instead.
5214 if (VT.getVectorElementType() == MVT::bf16 ||
5215 (VT.getVectorElementType() == MVT::f16 &&
5216 !Subtarget.hasVInstructionsF16())) {
5217 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5218 Splat =
5219 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5220 V2 = DAG.getBitcast(
5221 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5222 SDValue Vec = DAG.getNode(
5223 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5224 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5225 Vec = DAG.getBitcast(ContainerVT, Vec);
5226 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5227 }
5228
5229 auto OpCode = IsVSlidedown ?
5230 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5231 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5232 if (!VT.isFloatingPoint())
5233 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5234 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5235 DAG.getUNDEF(ContainerVT),
5236 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5237 Splat, TrueMask, VL);
5238 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5239}
5240
5241/// Match a mask which "spreads" the leading elements of a vector evenly
5242/// across the result. Factor is the spread amount, and Index is the
5243/// offset applied. (on success, Index < Factor) This is the inverse
5244/// of a deinterleave with the same Factor and Index. This is analogous
5245/// to an interleave, except that all but one lane is undef.
5247 unsigned &Index) {
5248 SmallVector<bool> LaneIsUndef(Factor, true);
5249 for (unsigned i = 0; i < Mask.size(); i++)
5250 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5251
5252 bool Found = false;
5253 for (unsigned i = 0; i < Factor; i++) {
5254 if (LaneIsUndef[i])
5255 continue;
5256 if (Found)
5257 return false;
5258 Index = i;
5259 Found = true;
5260 }
5261 if (!Found)
5262 return false;
5263
5264 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5265 unsigned j = i * Factor + Index;
5266 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5267 return false;
5268 }
5269 return true;
5270}
5271
5272static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5273 const SDLoc &DL, SelectionDAG &DAG,
5274 const RISCVSubtarget &Subtarget) {
5275 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5276 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5277 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5279
5280 MVT VT = Op0.getSimpleValueType();
5282 Op0 = DAG.getBitcast(IntVT, Op0);
5283 Op1 = DAG.getBitcast(IntVT, Op1);
5284
5285 MVT ContainerVT = IntVT;
5286 if (VT.isFixedLengthVector()) {
5287 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5288 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5289 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5290 }
5291
5292 MVT InnerVT = ContainerVT;
5293 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5294 if (Op1.isUndef() &&
5295 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5296 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5297 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5298 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5299 Subtarget.getXLenVT());
5300 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5301 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5302 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5303 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5304 }
5305
5306 SDValue Passthru = DAG.getUNDEF(InnerVT);
5307 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5308 if (InnerVT.bitsLT(ContainerVT))
5309 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5310 if (IntVT.isFixedLengthVector())
5311 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5312 Res = DAG.getBitcast(VT, Res);
5313 return Res;
5314}
5315
5316// Given a vector a, b, c, d return a vector Factor times longer
5317// with Factor-1 undef's between elements. Ex:
5318// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5319// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5320static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5321 const SDLoc &DL, SelectionDAG &DAG) {
5322
5323 MVT VT = V.getSimpleValueType();
5324 unsigned EltBits = VT.getScalarSizeInBits();
5326 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5327
5328 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5329
5330 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5331 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5332 // allow the SHL to fold away if Index is 0.
5333 if (Index != 0)
5334 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5335 DAG.getConstant(EltBits * Index, DL, WideVT));
5336 // Make sure to use original element type
5338 EC.multiplyCoefficientBy(Factor));
5339 return DAG.getBitcast(ResultVT, Result);
5340}
5341
5342// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5343// to create an interleaved vector of <[vscale x] n*2 x ty>.
5344// This requires that the size of ty is less than the subtarget's maximum ELEN.
5346 const SDLoc &DL, SelectionDAG &DAG,
5347 const RISCVSubtarget &Subtarget) {
5348
5349 // FIXME: Not only does this optimize the code, it fixes some correctness
5350 // issues because MIR does not have freeze.
5351 if (EvenV.isUndef())
5352 return getWideningSpread(OddV, 2, 1, DL, DAG);
5353 if (OddV.isUndef())
5354 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5355
5356 MVT VecVT = EvenV.getSimpleValueType();
5357 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5358 // Convert fixed vectors to scalable if needed
5359 if (VecContainerVT.isFixedLengthVector()) {
5360 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5361 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5362 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5363 }
5364
5365 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5366
5367 // We're working with a vector of the same size as the resulting
5368 // interleaved vector, but with half the number of elements and
5369 // twice the SEW (Hence the restriction on not using the maximum
5370 // ELEN)
5371 MVT WideVT =
5373 VecVT.getVectorElementCount());
5374 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5375 if (WideContainerVT.isFixedLengthVector())
5376 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5377
5378 // Bitcast the input vectors to integers in case they are FP
5379 VecContainerVT = VecContainerVT.changeTypeToInteger();
5380 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5381 OddV = DAG.getBitcast(VecContainerVT, OddV);
5382
5383 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5384 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5385
5386 SDValue Interleaved;
5387 if (Subtarget.hasStdExtZvbb()) {
5388 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5389 SDValue OffsetVec =
5390 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5391 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5392 OffsetVec, Passthru, Mask, VL);
5393 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5394 Interleaved, EvenV, Passthru, Mask, VL);
5395 } else {
5396 // FIXME: We should freeze the odd vector here. We already handled the case
5397 // of provably undef/poison above.
5398
5399 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5400 // vwaddu.vv
5401 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5402 OddV, Passthru, Mask, VL);
5403
5404 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5405 SDValue AllOnesVec = DAG.getSplatVector(
5406 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5407 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5408 OddV, AllOnesVec, Passthru, Mask, VL);
5409
5410 // Add the two together so we get
5411 // (OddV * 0xff...ff) + (OddV + EvenV)
5412 // = (OddV * 0x100...00) + EvenV
5413 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5414 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5415 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5416 Interleaved, OddsMul, Passthru, Mask, VL);
5417 }
5418
5419 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5420 MVT ResultContainerVT = MVT::getVectorVT(
5421 VecVT.getVectorElementType(), // Make sure to use original type
5422 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5423 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5424
5425 // Convert back to a fixed vector if needed
5426 MVT ResultVT =
5429 if (ResultVT.isFixedLengthVector())
5430 Interleaved =
5431 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5432
5433 return Interleaved;
5434}
5435
5436// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5437// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5439 SelectionDAG &DAG,
5440 const RISCVSubtarget &Subtarget) {
5441 SDLoc DL(SVN);
5442 MVT VT = SVN->getSimpleValueType(0);
5443 SDValue V = SVN->getOperand(0);
5444 unsigned NumElts = VT.getVectorNumElements();
5445
5446 assert(VT.getVectorElementType() == MVT::i1);
5447
5449 SVN->getMask().size()) ||
5450 !SVN->getOperand(1).isUndef())
5451 return SDValue();
5452
5453 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5454 EVT ViaVT = EVT::getVectorVT(
5455 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5456 EVT ViaBitVT =
5457 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5458
5459 // If we don't have zvbb or the larger element type > ELEN, the operation will
5460 // be illegal.
5462 ViaVT) ||
5463 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5464 return SDValue();
5465
5466 // If the bit vector doesn't fit exactly into the larger element type, we need
5467 // to insert it into the larger vector and then shift up the reversed bits
5468 // afterwards to get rid of the gap introduced.
5469 if (ViaEltSize > NumElts)
5470 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5471
5472 SDValue Res =
5473 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5474
5475 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5476 // element type.
5477 if (ViaEltSize > NumElts)
5478 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5479 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5480
5481 Res = DAG.getBitcast(ViaBitVT, Res);
5482
5483 if (ViaEltSize > NumElts)
5484 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5485 return Res;
5486}
5487
5489 const RISCVSubtarget &Subtarget,
5490 MVT &RotateVT, unsigned &RotateAmt) {
5491 unsigned NumElts = VT.getVectorNumElements();
5492 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5493 unsigned NumSubElts;
5494 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5495 NumElts, NumSubElts, RotateAmt))
5496 return false;
5497 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5498 NumElts / NumSubElts);
5499
5500 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5501 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5502}
5503
5504// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5505// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5506// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5508 SelectionDAG &DAG,
5509 const RISCVSubtarget &Subtarget) {
5510 SDLoc DL(SVN);
5511
5512 EVT VT = SVN->getValueType(0);
5513 unsigned RotateAmt;
5514 MVT RotateVT;
5515 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5516 return SDValue();
5517
5518 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5519
5520 SDValue Rotate;
5521 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5522 // so canonicalize to vrev8.
5523 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5524 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5525 else
5526 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5527 DAG.getConstant(RotateAmt, DL, RotateVT));
5528
5529 return DAG.getBitcast(VT, Rotate);
5530}
5531
5532// If compiling with an exactly known VLEN, see if we can split a
5533// shuffle on m2 or larger into a small number of m1 sized shuffles
5534// which write each destination registers exactly once.
5536 SelectionDAG &DAG,
5537 const RISCVSubtarget &Subtarget) {
5538 SDLoc DL(SVN);
5539 MVT VT = SVN->getSimpleValueType(0);
5540 SDValue V1 = SVN->getOperand(0);
5541 SDValue V2 = SVN->getOperand(1);
5542 ArrayRef<int> Mask = SVN->getMask();
5543
5544 // If we don't know exact data layout, not much we can do. If this
5545 // is already m1 or smaller, no point in splitting further.
5546 const auto VLen = Subtarget.getRealVLen();
5547 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5548 return SDValue();
5549
5550 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5551 // expansion for.
5552 unsigned RotateAmt;
5553 MVT RotateVT;
5554 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5555 return SDValue();
5556
5557 MVT ElemVT = VT.getVectorElementType();
5558 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5559
5560 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5561 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5562 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5563 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5564 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5565 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5566 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5567 unsigned NumOfDestRegs = NumElts / NumOpElts;
5568 // The following semantically builds up a fixed length concat_vector
5569 // of the component shuffle_vectors. We eagerly lower to scalable here
5570 // to avoid DAG combining it back to a large shuffle_vector again.
5571 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5572 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5574 Operands;
5576 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5577 [&]() { Operands.emplace_back(); },
5578 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5579 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5580 SmallVector<int>(SrcSubMask));
5581 },
5582 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5583 if (NewReg)
5584 Operands.emplace_back();
5585 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5586 });
5587 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5588 // Note: check that we do not emit too many shuffles here to prevent code
5589 // size explosion.
5590 // TODO: investigate, if it can be improved by extra analysis of the masks to
5591 // check if the code is more profitable.
5592 unsigned NumShuffles = std::accumulate(
5593 Operands.begin(), Operands.end(), 0u,
5594 [&](unsigned N,
5595 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5596 if (Data.empty())
5597 return N;
5598 N += Data.size();
5599 for (const auto &P : Data) {
5600 unsigned Idx2 = std::get<1>(P);
5601 ArrayRef<int> Mask = std::get<2>(P);
5602 if (Idx2 != UINT_MAX)
5603 ++N;
5604 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5605 --N;
5606 }
5607 return N;
5608 });
5609 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5610 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5611 return SDValue();
5612 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5613 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5614 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5615 return SubVec;
5616 };
5617 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5619 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5620 return SubVec;
5621 };
5622 SDValue Vec = DAG.getUNDEF(ContainerVT);
5623 for (auto [I, Data] : enumerate(Operands)) {
5624 if (Data.empty())
5625 continue;
5627 for (unsigned I : seq<unsigned>(Data.size())) {
5628 const auto &[Idx1, Idx2, _] = Data[I];
5629 // If the shuffle contains permutation of odd number of elements,
5630 // Idx1 might be used already in the first iteration.
5631 //
5632 // Idx1 = shuffle Idx1, Idx2
5633 // Idx1 = shuffle Idx1, Idx3
5634 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5635 if (!V)
5636 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5637 (Idx1 % NumOfSrcRegs) * NumOpElts);
5638 if (Idx2 != UINT_MAX) {
5639 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5640 if (!V)
5641 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5642 (Idx2 % NumOfSrcRegs) * NumOpElts);
5643 }
5644 }
5645 SDValue V;
5646 for (const auto &[Idx1, Idx2, Mask] : Data) {
5647 SDValue V1 = Values.at(Idx1);
5648 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5649 V = PerformShuffle(V1, V2, Mask);
5650 Values[Idx1] = V;
5651 }
5652
5653 unsigned InsertIdx = I * NumOpElts;
5654 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5655 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5656 }
5657 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5658}
5659
5660// Matches a subset of compress masks with a contiguous prefix of output
5661// elements. This could be extended to allow gaps by deciding which
5662// source elements to spuriously demand.
5664 int Last = -1;
5665 bool SawUndef = false;
5666 for (const auto &[Idx, M] : enumerate(Mask)) {
5667 if (M == -1) {
5668 SawUndef = true;
5669 continue;
5670 }
5671 if (SawUndef)
5672 return false;
5673 if (Idx > (unsigned)M)
5674 return false;
5675 if (M <= Last)
5676 return false;
5677 Last = M;
5678 }
5679 return true;
5680}
5681
5682/// Given a shuffle where the indices are disjoint between the two sources,
5683/// e.g.:
5684///
5685/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5686///
5687/// Merge the two sources into one and do a single source shuffle:
5688///
5689/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5690/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5691///
5692/// A vselect will either be merged into a masked instruction or be lowered as a
5693/// vmerge.vvm, which is cheaper than a vrgather.vv.
5695 SelectionDAG &DAG,
5696 const RISCVSubtarget &Subtarget) {
5697 MVT VT = SVN->getSimpleValueType(0);
5698 MVT XLenVT = Subtarget.getXLenVT();
5699 SDLoc DL(SVN);
5700
5701 const ArrayRef<int> Mask = SVN->getMask();
5702
5703 // Work out which source each lane will come from.
5704 SmallVector<int, 16> Srcs(Mask.size(), -1);
5705
5706 for (int Idx : Mask) {
5707 if (Idx == -1)
5708 continue;
5709 unsigned SrcIdx = Idx % Mask.size();
5710 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5711 if (Srcs[SrcIdx] == -1)
5712 // Mark this source as using this lane.
5713 Srcs[SrcIdx] = Src;
5714 else if (Srcs[SrcIdx] != Src)
5715 // The other source is using this lane: not disjoint.
5716 return SDValue();
5717 }
5718
5719 SmallVector<SDValue> SelectMaskVals;
5720 for (int Lane : Srcs) {
5721 if (Lane == -1)
5722 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5723 else
5724 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5725 }
5726 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5727 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5728 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5729 SVN->getOperand(0), SVN->getOperand(1));
5730
5731 // Move all indices relative to the first source.
5732 SmallVector<int> NewMask(Mask.size());
5733 for (unsigned I = 0; I < Mask.size(); I++) {
5734 if (Mask[I] == -1)
5735 NewMask[I] = -1;
5736 else
5737 NewMask[I] = Mask[I] % Mask.size();
5738 }
5739
5740 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5741}
5742
5743/// Is this mask local (i.e. elements only move within their local span), and
5744/// repeating (that is, the same rearrangement is being done within each span)?
5745static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5746 // Require a prefix from the original mask until the consumer code
5747 // is adjusted to rewrite the mask instead of just taking a prefix.
5748 for (auto [I, M] : enumerate(Mask)) {
5749 if (M == -1)
5750 continue;
5751 if ((M / Span) != (int)(I / Span))
5752 return false;
5753 int SpanIdx = I % Span;
5754 int Expected = M % Span;
5755 if (Mask[SpanIdx] != Expected)
5756 return false;
5757 }
5758 return true;
5759}
5760
5761/// Is this mask only using elements from the first span of the input?
5762static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5763 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5764}
5765
5766/// Return true for a mask which performs an arbitrary shuffle within the first
5767/// span, and then repeats that same result across all remaining spans. Note
5768/// that this doesn't check if all the inputs come from a single span!
5769static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5770 // Require a prefix from the original mask until the consumer code
5771 // is adjusted to rewrite the mask instead of just taking a prefix.
5772 for (auto [I, M] : enumerate(Mask)) {
5773 if (M == -1)
5774 continue;
5775 int SpanIdx = I % Span;
5776 if (Mask[SpanIdx] != M)
5777 return false;
5778 }
5779 return true;
5780}
5781
5782/// Try to widen element type to get a new mask value for a better permutation
5783/// sequence. This doesn't try to inspect the widened mask for profitability;
5784/// we speculate the widened form is equal or better. This has the effect of
5785/// reducing mask constant sizes - allowing cheaper materialization sequences
5786/// - and index sequence sizes - reducing register pressure and materialization
5787/// cost, at the cost of (possibly) an extra VTYPE toggle.
5789 SDLoc DL(Op);
5790 MVT VT = Op.getSimpleValueType();
5791 MVT ScalarVT = VT.getVectorElementType();
5792 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5793 SDValue V0 = Op.getOperand(0);
5794 SDValue V1 = Op.getOperand(1);
5795 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5796
5797 // Avoid wasted work leading to isTypeLegal check failing below
5798 if (ElementSize > 32)
5799 return SDValue();
5800
5801 SmallVector<int, 8> NewMask;
5802 if (!widenShuffleMaskElts(Mask, NewMask))
5803 return SDValue();
5804
5805 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5806 : MVT::getIntegerVT(ElementSize * 2);
5807 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5808 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5809 return SDValue();
5810 V0 = DAG.getBitcast(NewVT, V0);
5811 V1 = DAG.getBitcast(NewVT, V1);
5812 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5813}
5814
5816 const RISCVSubtarget &Subtarget) {
5817 SDValue V1 = Op.getOperand(0);
5818 SDValue V2 = Op.getOperand(1);
5819 SDLoc DL(Op);
5820 MVT XLenVT = Subtarget.getXLenVT();
5821 MVT VT = Op.getSimpleValueType();
5822 unsigned NumElts = VT.getVectorNumElements();
5824
5825 if (VT.getVectorElementType() == MVT::i1) {
5826 // Lower to a vror.vi of a larger element type if possible before we promote
5827 // i1s to i8s.
5828 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5829 return V;
5830 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5831 return V;
5832
5833 // Promote i1 shuffle to i8 shuffle.
5834 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5835 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5836 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5837 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5838 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5839 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5840 ISD::SETNE);
5841 }
5842
5843 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5844
5845 // Store the return value in a single variable instead of structured bindings
5846 // so that we can pass it to GetSlide below, which cannot capture structured
5847 // bindings until C++20.
5848 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5849 auto [TrueMask, VL] = TrueMaskVL;
5850
5851 if (SVN->isSplat()) {
5852 const int Lane = SVN->getSplatIndex();
5853 if (Lane >= 0) {
5854 MVT SVT = VT.getVectorElementType();
5855
5856 // Turn splatted vector load into a strided load with an X0 stride.
5857 SDValue V = V1;
5858 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5859 // with undef.
5860 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5861 int Offset = Lane;
5862 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5863 int OpElements =
5864 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5865 V = V.getOperand(Offset / OpElements);
5866 Offset %= OpElements;
5867 }
5868
5869 // We need to ensure the load isn't atomic or volatile.
5870 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5871 auto *Ld = cast<LoadSDNode>(V);
5872 Offset *= SVT.getStoreSize();
5873 SDValue NewAddr = DAG.getMemBasePlusOffset(
5874 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5875
5876 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5877 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5878 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5879 SDValue IntID =
5880 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5881 SDValue Ops[] = {Ld->getChain(),
5882 IntID,
5883 DAG.getUNDEF(ContainerVT),
5884 NewAddr,
5885 DAG.getRegister(RISCV::X0, XLenVT),
5886 VL};
5887 SDValue NewLoad = DAG.getMemIntrinsicNode(
5888 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5890 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5891 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5892 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5893 }
5894
5895 MVT SplatVT = ContainerVT;
5896
5897 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5898 if (SVT == MVT::bf16 ||
5899 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5900 SVT = MVT::i16;
5901 SplatVT = ContainerVT.changeVectorElementType(SVT);
5902 }
5903
5904 // Otherwise use a scalar load and splat. This will give the best
5905 // opportunity to fold a splat into the operation. ISel can turn it into
5906 // the x0 strided load if we aren't able to fold away the select.
5907 if (SVT.isFloatingPoint())
5908 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5909 Ld->getPointerInfo().getWithOffset(Offset),
5910 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5911 else
5912 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5913 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5914 Ld->getBaseAlign(),
5915 Ld->getMemOperand()->getFlags());
5917
5918 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5919 : RISCVISD::VMV_V_X_VL;
5920 SDValue Splat =
5921 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5922 Splat = DAG.getBitcast(ContainerVT, Splat);
5923 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5924 }
5925
5926 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5927 assert(Lane < (int)NumElts && "Unexpected lane!");
5928 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5929 V1, DAG.getConstant(Lane, DL, XLenVT),
5930 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5931 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5932 }
5933 }
5934
5935 // For exact VLEN m2 or greater, try to split to m1 operations if we
5936 // can split cleanly.
5937 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5938 return V;
5939
5940 ArrayRef<int> Mask = SVN->getMask();
5941
5942 if (SDValue V =
5943 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5944 return V;
5945
5946 if (SDValue V =
5947 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5948 return V;
5949
5950 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5951 // available.
5952 if (Subtarget.hasStdExtZvkb())
5953 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5954 return V;
5955
5956 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
5957 NumElts != 2)
5958 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5959
5960 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5961 // use shift and truncate to perform the shuffle.
5962 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5963 // shift-and-trunc reducing total cost for everything except an mf8 result.
5964 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5965 // to do the entire operation.
5966 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5967 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5968 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5969 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5970 unsigned Index = 0;
5971 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5972 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5973 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
5974 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5975 if (1 < count_if(Mask,
5976 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5977 1 < count_if(Mask, [&Mask](int Idx) {
5978 return Idx >= (int)Mask.size();
5979 })) {
5980 // Narrow each source and concatenate them.
5981 // FIXME: For small LMUL it is better to concatenate first.
5982 MVT EltVT = VT.getVectorElementType();
5983 auto EltCnt = VT.getVectorElementCount();
5984 MVT SubVT =
5985 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
5986
5987 SDValue Lo =
5988 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
5989 SDValue Hi =
5990 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
5991
5992 SDValue Concat =
5995 if (Factor == 2)
5996 return Concat;
5997
5998 SDValue Vec = DAG.getUNDEF(VT);
5999 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
6000 }
6001 }
6002 }
6003 }
6004
6005 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
6006 // e64 which can't match above.
6007 unsigned Index = 0;
6008 if (Subtarget.hasVendorXRivosVizip() &&
6010 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6011 unsigned Opc =
6012 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6013 if (V2.isUndef())
6014 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6015 if (auto VLEN = Subtarget.getRealVLen();
6016 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6017 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6018 if (SDValue Src = foldConcatVector(V1, V2)) {
6019 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6020 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6021 SDValue Res =
6022 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6023 return DAG.getExtractSubvector(DL, VT, Res, 0);
6024 }
6025 // Deinterleave each source and concatenate them, or concat first, then
6026 // deinterleave.
6027 if (1 < count_if(Mask,
6028 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6029 1 < count_if(Mask,
6030 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6031
6032 const unsigned EltSize = VT.getScalarSizeInBits();
6033 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6034 if (NumElts < MinVLMAX) {
6035 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6036 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6037 SDValue Res =
6038 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6039 return DAG.getExtractSubvector(DL, VT, Res, 0);
6040 }
6041
6042 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6043 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6044
6045 MVT SubVT = VT.getHalfNumVectorElementsVT();
6046 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6047 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6048 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6049 }
6050 }
6051
6052 if (SDValue V =
6053 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6054 return V;
6055
6056 // Detect an interleave shuffle and lower to
6057 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6058 int EvenSrc, OddSrc;
6059 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6060 !(NumElts == 2 &&
6061 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6062 // Extract the halves of the vectors.
6063 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6064
6065 // Recognize if one half is actually undef; the matching above will
6066 // otherwise reuse the even stream for the undef one. This improves
6067 // spread(2) shuffles.
6068 bool LaneIsUndef[2] = { true, true};
6069 for (const auto &[Idx, M] : enumerate(Mask))
6070 LaneIsUndef[Idx % 2] &= (M == -1);
6071
6072 int Size = Mask.size();
6073 SDValue EvenV, OddV;
6074 if (LaneIsUndef[0]) {
6075 EvenV = DAG.getUNDEF(HalfVT);
6076 } else {
6077 assert(EvenSrc >= 0 && "Undef source?");
6078 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6079 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6080 }
6081
6082 if (LaneIsUndef[1]) {
6083 OddV = DAG.getUNDEF(HalfVT);
6084 } else {
6085 assert(OddSrc >= 0 && "Undef source?");
6086 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6087 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6088 }
6089
6090 // Prefer vzip2a if available.
6091 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6092 if (Subtarget.hasVendorXRivosVizip()) {
6093 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6094 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6095 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6096 }
6097 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6098 }
6099
6100 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6101 // instructions (in any combination) with masking on the second instruction.
6102 // Also handles masked slides into an identity source, and single slides
6103 // without masking. Avoid matching bit rotates (which are not also element
6104 // rotates) as slide pairs. This is a performance heuristic, not a
6105 // functional check.
6106 std::array<std::pair<int, int>, 2> SrcInfo;
6107 unsigned RotateAmt;
6108 MVT RotateVT;
6109 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6110 (isElementRotate(SrcInfo, NumElts) ||
6111 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6112 SDValue Sources[2];
6113 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6114 int SrcIdx = Info.first;
6115 assert(SrcIdx == 0 || SrcIdx == 1);
6116 SDValue &Src = Sources[SrcIdx];
6117 if (!Src) {
6118 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6119 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6120 }
6121 return Src;
6122 };
6123 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6124 SDValue Passthru) {
6125 auto [TrueMask, VL] = TrueMaskVL;
6126 SDValue SrcV = GetSourceFor(Src);
6127 int SlideAmt = Src.second;
6128 if (SlideAmt == 0) {
6129 // Should never be second operation
6130 assert(Mask == TrueMask);
6131 return SrcV;
6132 }
6133 if (SlideAmt < 0)
6134 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6135 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6137 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6138 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6140 };
6141
6142 if (SrcInfo[1].first == -1) {
6143 SDValue Res = DAG.getUNDEF(ContainerVT);
6144 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6145 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6146 }
6147
6148 if (Subtarget.hasVendorXRivosVizip()) {
6149 bool TryWiden = false;
6150 unsigned Factor;
6151 if (isZipEven(SrcInfo, Mask, Factor)) {
6152 if (Factor == 1) {
6153 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6154 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6155 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6156 Subtarget);
6157 }
6158 TryWiden = true;
6159 }
6160 if (isZipOdd(SrcInfo, Mask, Factor)) {
6161 if (Factor == 1) {
6162 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6163 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6164 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6165 Subtarget);
6166 }
6167 TryWiden = true;
6168 }
6169 // If we found a widening oppurtunity which would let us form a
6170 // zipeven or zipodd, use the generic code to widen the shuffle
6171 // and recurse through this logic.
6172 if (TryWiden)
6173 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6174 return V;
6175 }
6176
6177 // Build the mask. Note that vslideup unconditionally preserves elements
6178 // below the slide amount in the destination, and thus those elements are
6179 // undefined in the mask. If the mask ends up all true (or undef), it
6180 // will be folded away by general logic.
6181 SmallVector<SDValue> MaskVals;
6182 for (const auto &[Idx, M] : enumerate(Mask)) {
6183 if (M < 0 ||
6184 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6185 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6186 continue;
6187 }
6188 int Src = M >= (int)NumElts;
6189 int Diff = (int)Idx - (M % NumElts);
6190 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6191 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6192 "Must match exactly one of the two slides");
6193 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6194 }
6195 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6196 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6197 SDValue SelectMask = convertToScalableVector(
6198 ContainerVT.changeVectorElementType(MVT::i1),
6199 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6200
6201 SDValue Res = DAG.getUNDEF(ContainerVT);
6202 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6203 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6204 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6205 }
6206
6207 // Handle any remaining single source shuffles
6208 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6209 if (V2.isUndef()) {
6210 // We might be able to express the shuffle as a bitrotate. But even if we
6211 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6212 // shifts and a vor will have a higher throughput than a vrgather.
6213 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6214 return V;
6215
6216 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6217 return V;
6218
6219 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6220 // is fully covered in interleave(2) above, so it is ignored here.
6221 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6222 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6223 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6224 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6225 unsigned Index;
6226 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6227 MVT NarrowVT =
6228 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6229 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6230 return getWideningSpread(Src, Factor, Index, DL, DAG);
6231 }
6232 }
6233 }
6234
6235 // If only a prefix of the source elements influence a prefix of the
6236 // destination elements, try to see if we can reduce the required LMUL
6237 unsigned MinVLen = Subtarget.getRealMinVLen();
6238 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6239 if (NumElts > MinVLMAX) {
6240 unsigned MaxIdx = 0;
6241 for (auto [I, M] : enumerate(Mask)) {
6242 if (M == -1)
6243 continue;
6244 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6245 }
6246 unsigned NewNumElts =
6247 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6248 if (NewNumElts != NumElts) {
6249 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6250 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6251 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6252 Mask.take_front(NewNumElts));
6253 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6254 }
6255 }
6256
6257 // Before hitting generic lowering fallbacks, try to widen the mask
6258 // to a wider SEW.
6259 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6260 return V;
6261
6262 // Can we generate a vcompress instead of a vrgather? These scale better
6263 // at high LMUL, at the cost of not being able to fold a following select
6264 // into them. The mask constants are also smaller than the index vector
6265 // constants, and thus easier to materialize.
6266 if (isCompressMask(Mask)) {
6267 SmallVector<SDValue> MaskVals(NumElts,
6268 DAG.getConstant(false, DL, XLenVT));
6269 for (auto Idx : Mask) {
6270 if (Idx == -1)
6271 break;
6272 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6273 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6274 }
6275 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6276 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6277 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6278 DAG.getUNDEF(VT));
6279 }
6280
6281 if (VT.getScalarSizeInBits() == 8 &&
6282 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6283 // On such a vector we're unable to use i8 as the index type.
6284 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6285 // may involve vector splitting if we're already at LMUL=8, or our
6286 // user-supplied maximum fixed-length LMUL.
6287 return SDValue();
6288 }
6289
6290 // Base case for the two operand recursion below - handle the worst case
6291 // single source shuffle.
6292 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6293 MVT IndexVT = VT.changeTypeToInteger();
6294 // Since we can't introduce illegal index types at this stage, use i16 and
6295 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6296 // than XLenVT.
6297 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6298 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6299 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6300 }
6301
6302 // If the mask allows, we can do all the index computation in 16 bits. This
6303 // requires less work and less register pressure at high LMUL, and creates
6304 // smaller constants which may be cheaper to materialize.
6305 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6306 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6307 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6308 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6309 }
6310
6311 MVT IndexContainerVT =
6312 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6313
6314 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6315 SmallVector<SDValue> GatherIndicesLHS;
6316 for (int MaskIndex : Mask) {
6317 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6318 GatherIndicesLHS.push_back(IsLHSIndex
6319 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6320 : DAG.getUNDEF(XLenVT));
6321 }
6322 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6323 LHSIndices =
6324 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6325 // At m1 and less, there's no point trying any of the high LMUL splitting
6326 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6327 if (NumElts <= MinVLMAX) {
6328 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6329 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6330 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6331 }
6332
6333 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6334 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6335 auto [InnerTrueMask, InnerVL] =
6336 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6337 int N =
6338 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6339 assert(isPowerOf2_32(N) && N <= 8);
6340
6341 // If we have a locally repeating mask, then we can reuse the first
6342 // register in the index register group for all registers within the
6343 // source register group. TODO: This generalizes to m2, and m4.
6344 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6345 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6346 SDValue Gather = DAG.getUNDEF(ContainerVT);
6347 for (int i = 0; i < N; i++) {
6348 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6349 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6350 SDValue SubVec =
6351 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6352 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6353 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6354 }
6355 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6356 }
6357
6358 // If we have a shuffle which only uses the first register in our source
6359 // register group, and repeats the same index across all spans, we can
6360 // use a single vrgather (and possibly some register moves).
6361 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6362 // which we can do a linear number of shuffles to form an m1 which
6363 // contains all the output elements.
6364 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6365 isSpanSplatShuffle(Mask, MinVLMAX)) {
6366 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6367 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6368 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6369 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6370 SDValue Gather = DAG.getUNDEF(ContainerVT);
6371 for (int i = 0; i < N; i++)
6372 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6373 M1VT.getVectorMinNumElements() * i);
6374 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6375 }
6376
6377 // If we have a shuffle which only uses the first register in our
6378 // source register group, we can do a linear number of m1 vrgathers
6379 // reusing the same source register (but with different indices)
6380 // TODO: This can be generalized for m2 or m4, or for any shuffle
6381 // for which we can do a vslidedown followed by this expansion.
6382 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6383 SDValue SlideAmt =
6384 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6385 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6386 SDValue Gather = DAG.getUNDEF(ContainerVT);
6387 for (int i = 0; i < N; i++) {
6388 if (i != 0)
6389 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6390 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6391 SlideAmt, TrueMask, VL);
6392 SDValue SubIndex =
6393 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6394 SDValue SubVec =
6395 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6396 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6397 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6398 M1VT.getVectorMinNumElements() * i);
6399 }
6400 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6401 }
6402
6403 // Fallback to generic vrgather if we can't find anything better.
6404 // On many machines, this will be O(LMUL^2)
6405 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6406 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6407 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6408 }
6409
6410 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6411 // merged with a second vrgather.
6412 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6413
6414 // Now construct the mask that will be used by the blended vrgather operation.
6415 // Construct the appropriate indices into each vector.
6416 for (int MaskIndex : Mask) {
6417 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6418 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6419 ? MaskIndex : -1);
6420 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6421 }
6422
6423 // If the mask indices are disjoint between the two sources, we can lower it
6424 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6425 // operands may end up being lowered to something cheaper than a vrgather.vv.
6426 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6427 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6428 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6429 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6430 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6431 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6432 return V;
6433
6434 // Before hitting generic lowering fallbacks, try to widen the mask
6435 // to a wider SEW.
6436 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6437 return V;
6438
6439 // Try to pick a profitable operand order.
6440 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6441 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6442
6443 // Recursively invoke lowering for each operand if we had two
6444 // independent single source shuffles, and then combine the result via a
6445 // vselect. Note that the vselect will likely be folded back into the
6446 // second permute (vrgather, or other) by the post-isel combine.
6447 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6448 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6449
6450 SmallVector<SDValue> MaskVals;
6451 for (int MaskIndex : Mask) {
6452 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6453 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6454 }
6455
6456 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6457 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6458 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6459
6460 if (SwapOps)
6461 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6462 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6463}
6464
6466 // Only support legal VTs for other shuffles for now.
6467 if (!isTypeLegal(VT))
6468 return false;
6469
6470 // Support splats for any type. These should type legalize well.
6472 return true;
6473
6474 const unsigned NumElts = M.size();
6475 MVT SVT = VT.getSimpleVT();
6476
6477 // Not for i1 vectors.
6478 if (SVT.getScalarType() == MVT::i1)
6479 return false;
6480
6481 std::array<std::pair<int, int>, 2> SrcInfo;
6482 int Dummy1, Dummy2;
6483 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6484 (::isMaskedSlidePair(M, SrcInfo) &&
6485 isElementRotate(SrcInfo, NumElts)) ||
6486 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6487}
6488
6489// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6490// the exponent.
6491SDValue
6492RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6493 SelectionDAG &DAG) const {
6494 MVT VT = Op.getSimpleValueType();
6495 unsigned EltSize = VT.getScalarSizeInBits();
6496 SDValue Src = Op.getOperand(0);
6497 SDLoc DL(Op);
6498 MVT ContainerVT = VT;
6499
6500 SDValue Mask, VL;
6501 if (Op->isVPOpcode()) {
6502 Mask = Op.getOperand(1);
6503 if (VT.isFixedLengthVector())
6504 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6505 Subtarget);
6506 VL = Op.getOperand(2);
6507 }
6508
6509 // We choose FP type that can represent the value if possible. Otherwise, we
6510 // use rounding to zero conversion for correct exponent of the result.
6511 // TODO: Use f16 for i8 when possible?
6512 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6513 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6514 FloatEltVT = MVT::f32;
6515 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6516
6517 // Legal types should have been checked in the RISCVTargetLowering
6518 // constructor.
6519 // TODO: Splitting may make sense in some cases.
6520 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6521 "Expected legal float type!");
6522
6523 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6524 // The trailing zero count is equal to log2 of this single bit value.
6525 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6526 SDValue Neg = DAG.getNegative(Src, DL, VT);
6527 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6528 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6529 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6530 Src, Mask, VL);
6531 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6532 }
6533
6534 // We have a legal FP type, convert to it.
6535 SDValue FloatVal;
6536 if (FloatVT.bitsGT(VT)) {
6537 if (Op->isVPOpcode())
6538 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6539 else
6540 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6541 } else {
6542 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6543 if (VT.isFixedLengthVector()) {
6544 ContainerVT = getContainerForFixedLengthVector(VT);
6545 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6546 }
6547 if (!Op->isVPOpcode())
6548 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6549 SDValue RTZRM =
6550 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6551 MVT ContainerFloatVT =
6552 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6553 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6554 Src, Mask, RTZRM, VL);
6555 if (VT.isFixedLengthVector())
6556 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6557 }
6558 // Bitcast to integer and shift the exponent to the LSB.
6559 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6560 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6561 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6562
6563 SDValue Exp;
6564 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6565 if (Op->isVPOpcode()) {
6566 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6567 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6568 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6569 } else {
6570 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6571 DAG.getConstant(ShiftAmt, DL, IntVT));
6572 if (IntVT.bitsLT(VT))
6573 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6574 else if (IntVT.bitsGT(VT))
6575 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6576 }
6577
6578 // The exponent contains log2 of the value in biased form.
6579 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6580 // For trailing zeros, we just need to subtract the bias.
6581 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6582 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6583 DAG.getConstant(ExponentBias, DL, VT));
6584 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6585 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6586 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6587
6588 // For leading zeros, we need to remove the bias and convert from log2 to
6589 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6590 unsigned Adjust = ExponentBias + (EltSize - 1);
6591 SDValue Res;
6592 if (Op->isVPOpcode())
6593 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6594 Mask, VL);
6595 else
6596 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6597
6598 // The above result with zero input equals to Adjust which is greater than
6599 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6600 if (Op.getOpcode() == ISD::CTLZ)
6601 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6602 else if (Op.getOpcode() == ISD::VP_CTLZ)
6603 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6604 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6605 return Res;
6606}
6607
6608SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6609 SelectionDAG &DAG) const {
6610 SDLoc DL(Op);
6611 MVT XLenVT = Subtarget.getXLenVT();
6612 SDValue Source = Op->getOperand(0);
6613 MVT SrcVT = Source.getSimpleValueType();
6614 SDValue Mask = Op->getOperand(1);
6615 SDValue EVL = Op->getOperand(2);
6616
6617 if (SrcVT.isFixedLengthVector()) {
6618 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6619 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6620 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6621 Subtarget);
6622 SrcVT = ContainerVT;
6623 }
6624
6625 // Convert to boolean vector.
6626 if (SrcVT.getScalarType() != MVT::i1) {
6627 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6628 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6629 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6630 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6631 DAG.getUNDEF(SrcVT), Mask, EVL});
6632 }
6633
6634 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6635 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6636 // In this case, we can interpret poison as -1, so nothing to do further.
6637 return Res;
6638
6639 // Convert -1 to VL.
6640 SDValue SetCC =
6641 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6642 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6643 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6644}
6645
6646// While RVV has alignment restrictions, we should always be able to load as a
6647// legal equivalently-sized byte-typed vector instead. This method is
6648// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6649// the load is already correctly-aligned, it returns SDValue().
6650SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6651 SelectionDAG &DAG) const {
6652 auto *Load = cast<LoadSDNode>(Op);
6653 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6654
6656 Load->getMemoryVT(),
6657 *Load->getMemOperand()))
6658 return SDValue();
6659
6660 SDLoc DL(Op);
6661 MVT VT = Op.getSimpleValueType();
6662 unsigned EltSizeBits = VT.getScalarSizeInBits();
6663 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6664 "Unexpected unaligned RVV load type");
6665 MVT NewVT =
6666 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6667 assert(NewVT.isValid() &&
6668 "Expecting equally-sized RVV vector types to be legal");
6669 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6670 Load->getPointerInfo(), Load->getBaseAlign(),
6671 Load->getMemOperand()->getFlags());
6672 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6673}
6674
6675// While RVV has alignment restrictions, we should always be able to store as a
6676// legal equivalently-sized byte-typed vector instead. This method is
6677// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6678// returns SDValue() if the store is already correctly aligned.
6679SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6680 SelectionDAG &DAG) const {
6681 auto *Store = cast<StoreSDNode>(Op);
6682 assert(Store && Store->getValue().getValueType().isVector() &&
6683 "Expected vector store");
6684
6686 Store->getMemoryVT(),
6687 *Store->getMemOperand()))
6688 return SDValue();
6689
6690 SDLoc DL(Op);
6691 SDValue StoredVal = Store->getValue();
6692 MVT VT = StoredVal.getSimpleValueType();
6693 unsigned EltSizeBits = VT.getScalarSizeInBits();
6694 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6695 "Unexpected unaligned RVV store type");
6696 MVT NewVT =
6697 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6698 assert(NewVT.isValid() &&
6699 "Expecting equally-sized RVV vector types to be legal");
6700 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6701 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6702 Store->getPointerInfo(), Store->getBaseAlign(),
6703 Store->getMemOperand()->getFlags());
6704}
6705
6707 const RISCVSubtarget &Subtarget) {
6708 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6709
6710 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6711
6712 // All simm32 constants should be handled by isel.
6713 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6714 // this check redundant, but small immediates are common so this check
6715 // should have better compile time.
6716 if (isInt<32>(Imm))
6717 return Op;
6718
6719 // We only need to cost the immediate, if constant pool lowering is enabled.
6720 if (!Subtarget.useConstantPoolForLargeInts())
6721 return Op;
6722
6724 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6725 return Op;
6726
6727 // Optimizations below are disabled for opt size. If we're optimizing for
6728 // size, use a constant pool.
6729 if (DAG.shouldOptForSize())
6730 return SDValue();
6731
6732 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6733 // that if it will avoid a constant pool.
6734 // It will require an extra temporary register though.
6735 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6736 // low and high 32 bits are the same and bit 31 and 63 are set.
6737 unsigned ShiftAmt, AddOpc;
6738 RISCVMatInt::InstSeq SeqLo =
6739 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6740 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6741 return Op;
6742
6743 return SDValue();
6744}
6745
6746SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6747 SelectionDAG &DAG) const {
6748 MVT VT = Op.getSimpleValueType();
6749 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6750
6751 // Can this constant be selected by a Zfa FLI instruction?
6752 bool Negate = false;
6753 int Index = getLegalZfaFPImm(Imm, VT);
6754
6755 // If the constant is negative, try negating.
6756 if (Index < 0 && Imm.isNegative()) {
6757 Index = getLegalZfaFPImm(-Imm, VT);
6758 Negate = true;
6759 }
6760
6761 // If we couldn't find a FLI lowering, fall back to generic code.
6762 if (Index < 0)
6763 return SDValue();
6764
6765 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6766 SDLoc DL(Op);
6767 SDValue Const =
6768 DAG.getNode(RISCVISD::FLI, DL, VT,
6769 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6770 if (!Negate)
6771 return Const;
6772
6773 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6774}
6775
6777 SelectionDAG &DAG) {
6778
6779 unsigned IsData = Op.getConstantOperandVal(4);
6780
6781 // mips-p8700 we support data prefetch for now.
6782 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6783 return Op.getOperand(0);
6784 return Op;
6785}
6786
6788 const RISCVSubtarget &Subtarget) {
6789 SDLoc dl(Op);
6790 AtomicOrdering FenceOrdering =
6791 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6792 SyncScope::ID FenceSSID =
6793 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6794
6795 if (Subtarget.hasStdExtZtso()) {
6796 // The only fence that needs an instruction is a sequentially-consistent
6797 // cross-thread fence.
6798 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6799 FenceSSID == SyncScope::System)
6800 return Op;
6801
6802 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6803 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6804 }
6805
6806 // singlethread fences only synchronize with signal handlers on the same
6807 // thread and thus only need to preserve instruction order, not actually
6808 // enforce memory ordering.
6809 if (FenceSSID == SyncScope::SingleThread)
6810 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6811 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6812
6813 return Op;
6814}
6815
6816SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6817 SelectionDAG &DAG) const {
6818 SDLoc DL(Op);
6819 MVT VT = Op.getSimpleValueType();
6820 MVT XLenVT = Subtarget.getXLenVT();
6821 unsigned Check = Op.getConstantOperandVal(1);
6822 unsigned TDCMask = 0;
6823 if (Check & fcSNan)
6824 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6825 if (Check & fcQNan)
6826 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6827 if (Check & fcPosInf)
6829 if (Check & fcNegInf)
6831 if (Check & fcPosNormal)
6833 if (Check & fcNegNormal)
6835 if (Check & fcPosSubnormal)
6837 if (Check & fcNegSubnormal)
6839 if (Check & fcPosZero)
6840 TDCMask |= RISCV::FPMASK_Positive_Zero;
6841 if (Check & fcNegZero)
6842 TDCMask |= RISCV::FPMASK_Negative_Zero;
6843
6844 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6845
6846 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6847
6848 if (VT.isVector()) {
6849 SDValue Op0 = Op.getOperand(0);
6850 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6851
6852 if (VT.isScalableVector()) {
6853 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6854 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6855 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6856 Mask = Op.getOperand(2);
6857 VL = Op.getOperand(3);
6858 }
6859 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6860 VL, Op->getFlags());
6861 if (IsOneBitMask)
6862 return DAG.getSetCC(DL, VT, FPCLASS,
6863 DAG.getConstant(TDCMask, DL, DstVT),
6865 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6866 DAG.getConstant(TDCMask, DL, DstVT));
6867 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6868 ISD::SETNE);
6869 }
6870
6871 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6872 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6873 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6874 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6875 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6876 Mask = Op.getOperand(2);
6877 MVT MaskContainerVT =
6878 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6879 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6880 VL = Op.getOperand(3);
6881 }
6882 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6883
6884 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6885 Mask, VL, Op->getFlags());
6886
6887 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6888 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6889 if (IsOneBitMask) {
6890 SDValue VMSEQ =
6891 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6892 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6893 DAG.getUNDEF(ContainerVT), Mask, VL});
6894 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6895 }
6896 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6897 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6898
6899 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6900 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6901 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6902
6903 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6904 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6905 DAG.getUNDEF(ContainerVT), Mask, VL});
6906 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6907 }
6908
6909 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6910 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6911 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6913 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6914}
6915
6916// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6917// operations propagate nans.
6919 const RISCVSubtarget &Subtarget) {
6920 SDLoc DL(Op);
6921 MVT VT = Op.getSimpleValueType();
6922
6923 SDValue X = Op.getOperand(0);
6924 SDValue Y = Op.getOperand(1);
6925
6926 if (!VT.isVector()) {
6927 MVT XLenVT = Subtarget.getXLenVT();
6928
6929 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6930 // ensures that when one input is a nan, the other will also be a nan
6931 // allowing the nan to propagate. If both inputs are nan, this will swap the
6932 // inputs which is harmless.
6933
6934 SDValue NewY = Y;
6935 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6936 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6937 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6938 }
6939
6940 SDValue NewX = X;
6941 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6942 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6943 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6944 }
6945
6946 unsigned Opc =
6947 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6948 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6949 }
6950
6951 // Check no NaNs before converting to fixed vector scalable.
6952 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6953 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6954
6955 MVT ContainerVT = VT;
6956 if (VT.isFixedLengthVector()) {
6957 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6958 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6959 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6960 }
6961
6962 SDValue Mask, VL;
6963 if (Op->isVPOpcode()) {
6964 Mask = Op.getOperand(2);
6965 if (VT.isFixedLengthVector())
6966 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6967 Subtarget);
6968 VL = Op.getOperand(3);
6969 } else {
6970 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6971 }
6972
6973 SDValue NewY = Y;
6974 if (!XIsNeverNan) {
6975 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6976 {X, X, DAG.getCondCode(ISD::SETOEQ),
6977 DAG.getUNDEF(ContainerVT), Mask, VL});
6978 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6979 DAG.getUNDEF(ContainerVT), VL);
6980 }
6981
6982 SDValue NewX = X;
6983 if (!YIsNeverNan) {
6984 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6985 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6986 DAG.getUNDEF(ContainerVT), Mask, VL});
6987 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6988 DAG.getUNDEF(ContainerVT), VL);
6989 }
6990
6991 unsigned Opc =
6992 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6993 ? RISCVISD::VFMAX_VL
6994 : RISCVISD::VFMIN_VL;
6995 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6996 DAG.getUNDEF(ContainerVT), Mask, VL);
6997 if (VT.isFixedLengthVector())
6998 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6999 return Res;
7000}
7001
7003 const RISCVSubtarget &Subtarget) {
7004 bool IsFABS = Op.getOpcode() == ISD::FABS;
7005 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
7006 "Wrong opcode for lowering FABS or FNEG.");
7007
7008 MVT XLenVT = Subtarget.getXLenVT();
7009 MVT VT = Op.getSimpleValueType();
7010 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7011
7012 SDLoc DL(Op);
7013 SDValue Fmv =
7014 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7015
7016 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7017 Mask = Mask.sext(Subtarget.getXLen());
7018
7019 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7020 SDValue Logic =
7021 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7022 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7023}
7024
7026 const RISCVSubtarget &Subtarget) {
7027 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7028
7029 MVT XLenVT = Subtarget.getXLenVT();
7030 MVT VT = Op.getSimpleValueType();
7031 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7032
7033 SDValue Mag = Op.getOperand(0);
7034 SDValue Sign = Op.getOperand(1);
7035
7036 SDLoc DL(Op);
7037
7038 // Get sign bit into an integer value.
7039 unsigned SignSize = Sign.getValueSizeInBits();
7040 SDValue SignAsInt = [&]() {
7041 if (SignSize == Subtarget.getXLen())
7042 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7043 switch (SignSize) {
7044 case 16:
7045 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7046 case 32:
7047 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7048 case 64: {
7049 assert(XLenVT == MVT::i32 && "Unexpected type");
7050 // Copy the upper word to integer.
7051 SignSize = 32;
7052 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7053 .getValue(1);
7054 }
7055 default:
7056 llvm_unreachable("Unexpected sign size");
7057 }
7058 }();
7059
7060 // Get the signbit at the right position for MagAsInt.
7061 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7062 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7063 SignAsInt,
7064 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7065
7066 // Mask the sign bit and any bits above it. The extra bits will be dropped
7067 // when we convert back to FP.
7068 SDValue SignMask = DAG.getConstant(
7069 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7070 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7071
7072 // Transform Mag value to integer, and clear the sign bit.
7073 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7074 SDValue ClearSignMask = DAG.getConstant(
7075 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7076 SDValue ClearedSign =
7077 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7078
7079 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7081
7082 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7083}
7084
7085/// Get a RISC-V target specified VL op for a given SDNode.
7086static unsigned getRISCVVLOp(SDValue Op) {
7087#define OP_CASE(NODE) \
7088 case ISD::NODE: \
7089 return RISCVISD::NODE##_VL;
7090#define VP_CASE(NODE) \
7091 case ISD::VP_##NODE: \
7092 return RISCVISD::NODE##_VL;
7093 // clang-format off
7094 switch (Op.getOpcode()) {
7095 default:
7096 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7097 OP_CASE(ADD)
7098 OP_CASE(SUB)
7099 OP_CASE(MUL)
7100 OP_CASE(MULHS)
7101 OP_CASE(MULHU)
7102 OP_CASE(SDIV)
7103 OP_CASE(SREM)
7104 OP_CASE(UDIV)
7105 OP_CASE(UREM)
7106 OP_CASE(SHL)
7107 OP_CASE(SRA)
7108 OP_CASE(SRL)
7109 OP_CASE(ROTL)
7110 OP_CASE(ROTR)
7111 OP_CASE(BSWAP)
7112 OP_CASE(CTTZ)
7113 OP_CASE(CTLZ)
7114 OP_CASE(CTPOP)
7115 OP_CASE(BITREVERSE)
7116 OP_CASE(SADDSAT)
7117 OP_CASE(UADDSAT)
7118 OP_CASE(SSUBSAT)
7119 OP_CASE(USUBSAT)
7120 OP_CASE(AVGFLOORS)
7121 OP_CASE(AVGFLOORU)
7122 OP_CASE(AVGCEILS)
7123 OP_CASE(AVGCEILU)
7124 OP_CASE(FADD)
7125 OP_CASE(FSUB)
7126 OP_CASE(FMUL)
7127 OP_CASE(FDIV)
7128 OP_CASE(FNEG)
7129 OP_CASE(FABS)
7130 OP_CASE(FCOPYSIGN)
7131 OP_CASE(FSQRT)
7132 OP_CASE(SMIN)
7133 OP_CASE(SMAX)
7134 OP_CASE(UMIN)
7135 OP_CASE(UMAX)
7136 OP_CASE(STRICT_FADD)
7137 OP_CASE(STRICT_FSUB)
7138 OP_CASE(STRICT_FMUL)
7139 OP_CASE(STRICT_FDIV)
7140 OP_CASE(STRICT_FSQRT)
7141 VP_CASE(ADD) // VP_ADD
7142 VP_CASE(SUB) // VP_SUB
7143 VP_CASE(MUL) // VP_MUL
7144 VP_CASE(SDIV) // VP_SDIV
7145 VP_CASE(SREM) // VP_SREM
7146 VP_CASE(UDIV) // VP_UDIV
7147 VP_CASE(UREM) // VP_UREM
7148 VP_CASE(SHL) // VP_SHL
7149 VP_CASE(FADD) // VP_FADD
7150 VP_CASE(FSUB) // VP_FSUB
7151 VP_CASE(FMUL) // VP_FMUL
7152 VP_CASE(FDIV) // VP_FDIV
7153 VP_CASE(FNEG) // VP_FNEG
7154 VP_CASE(FABS) // VP_FABS
7155 VP_CASE(SMIN) // VP_SMIN
7156 VP_CASE(SMAX) // VP_SMAX
7157 VP_CASE(UMIN) // VP_UMIN
7158 VP_CASE(UMAX) // VP_UMAX
7159 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7160 VP_CASE(SETCC) // VP_SETCC
7161 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7162 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7163 VP_CASE(BITREVERSE) // VP_BITREVERSE
7164 VP_CASE(SADDSAT) // VP_SADDSAT
7165 VP_CASE(UADDSAT) // VP_UADDSAT
7166 VP_CASE(SSUBSAT) // VP_SSUBSAT
7167 VP_CASE(USUBSAT) // VP_USUBSAT
7168 VP_CASE(BSWAP) // VP_BSWAP
7169 VP_CASE(CTLZ) // VP_CTLZ
7170 VP_CASE(CTTZ) // VP_CTTZ
7171 VP_CASE(CTPOP) // VP_CTPOP
7173 case ISD::VP_CTLZ_ZERO_UNDEF:
7174 return RISCVISD::CTLZ_VL;
7176 case ISD::VP_CTTZ_ZERO_UNDEF:
7177 return RISCVISD::CTTZ_VL;
7178 case ISD::FMA:
7179 case ISD::VP_FMA:
7180 return RISCVISD::VFMADD_VL;
7181 case ISD::STRICT_FMA:
7182 return RISCVISD::STRICT_VFMADD_VL;
7183 case ISD::AND:
7184 case ISD::VP_AND:
7185 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7186 return RISCVISD::VMAND_VL;
7187 return RISCVISD::AND_VL;
7188 case ISD::OR:
7189 case ISD::VP_OR:
7190 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7191 return RISCVISD::VMOR_VL;
7192 return RISCVISD::OR_VL;
7193 case ISD::XOR:
7194 case ISD::VP_XOR:
7195 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7196 return RISCVISD::VMXOR_VL;
7197 return RISCVISD::XOR_VL;
7198 case ISD::ANY_EXTEND:
7199 case ISD::ZERO_EXTEND:
7200 return RISCVISD::VZEXT_VL;
7201 case ISD::SIGN_EXTEND:
7202 return RISCVISD::VSEXT_VL;
7203 case ISD::SETCC:
7204 return RISCVISD::SETCC_VL;
7205 case ISD::VSELECT:
7206 return RISCVISD::VMERGE_VL;
7207 case ISD::VP_SELECT:
7208 case ISD::VP_MERGE:
7209 return RISCVISD::VMERGE_VL;
7210 case ISD::VP_SRA:
7211 return RISCVISD::SRA_VL;
7212 case ISD::VP_SRL:
7213 return RISCVISD::SRL_VL;
7214 case ISD::VP_SQRT:
7215 return RISCVISD::FSQRT_VL;
7216 case ISD::VP_SIGN_EXTEND:
7217 return RISCVISD::VSEXT_VL;
7218 case ISD::VP_ZERO_EXTEND:
7219 return RISCVISD::VZEXT_VL;
7220 case ISD::VP_FP_TO_SINT:
7221 return RISCVISD::VFCVT_RTZ_X_F_VL;
7222 case ISD::VP_FP_TO_UINT:
7223 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7224 case ISD::FMINNUM:
7225 case ISD::FMINIMUMNUM:
7226 case ISD::VP_FMINNUM:
7227 return RISCVISD::VFMIN_VL;
7228 case ISD::FMAXNUM:
7229 case ISD::FMAXIMUMNUM:
7230 case ISD::VP_FMAXNUM:
7231 return RISCVISD::VFMAX_VL;
7232 case ISD::LRINT:
7233 case ISD::VP_LRINT:
7234 case ISD::LLRINT:
7235 case ISD::VP_LLRINT:
7236 return RISCVISD::VFCVT_RM_X_F_VL;
7237 }
7238 // clang-format on
7239#undef OP_CASE
7240#undef VP_CASE
7241}
7242
7244 const RISCVSubtarget &Subtarget) {
7245 return (Op.getValueType() == MVT::nxv32f16 &&
7246 (Subtarget.hasVInstructionsF16Minimal() &&
7247 !Subtarget.hasVInstructionsF16())) ||
7248 Op.getValueType() == MVT::nxv32bf16;
7249}
7250
7252 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7253 SDLoc DL(Op);
7254
7255 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7256 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7257
7258 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7259 if (!Op.getOperand(j).getValueType().isVector()) {
7260 LoOperands[j] = Op.getOperand(j);
7261 HiOperands[j] = Op.getOperand(j);
7262 continue;
7263 }
7264 std::tie(LoOperands[j], HiOperands[j]) =
7265 DAG.SplitVector(Op.getOperand(j), DL);
7266 }
7267
7268 SDValue LoRes =
7269 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7270 SDValue HiRes =
7271 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7272
7273 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7274}
7275
7277 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7278 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7279 SDLoc DL(Op);
7280
7281 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7282 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7283
7284 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7285 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7286 std::tie(LoOperands[j], HiOperands[j]) =
7287 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7288 continue;
7289 }
7290 if (!Op.getOperand(j).getValueType().isVector()) {
7291 LoOperands[j] = Op.getOperand(j);
7292 HiOperands[j] = Op.getOperand(j);
7293 continue;
7294 }
7295 std::tie(LoOperands[j], HiOperands[j]) =
7296 DAG.SplitVector(Op.getOperand(j), DL);
7297 }
7298
7299 SDValue LoRes =
7300 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7301 SDValue HiRes =
7302 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7303
7304 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7305}
7306
7308 SDLoc DL(Op);
7309
7310 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7311 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7312 auto [EVLLo, EVLHi] =
7313 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7314
7315 SDValue ResLo =
7316 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7317 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7318 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7319 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7320}
7321
7323
7324 assert(Op->isStrictFPOpcode());
7325
7326 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7327
7328 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7329 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7330
7331 SDLoc DL(Op);
7332
7333 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7334 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7335
7336 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7337 if (!Op.getOperand(j).getValueType().isVector()) {
7338 LoOperands[j] = Op.getOperand(j);
7339 HiOperands[j] = Op.getOperand(j);
7340 continue;
7341 }
7342 std::tie(LoOperands[j], HiOperands[j]) =
7343 DAG.SplitVector(Op.getOperand(j), DL);
7344 }
7345
7346 SDValue LoRes =
7347 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7348 HiOperands[0] = LoRes.getValue(1);
7349 SDValue HiRes =
7350 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7351
7352 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7353 LoRes.getValue(0), HiRes.getValue(0));
7354 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7355}
7356
7357SDValue
7358RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7359 SelectionDAG &DAG) const {
7360 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7361 "Unexpected bfloat16 load lowering");
7362
7363 SDLoc DL(Op);
7364 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7365 EVT MemVT = LD->getMemoryVT();
7366 SDValue Load = DAG.getExtLoad(
7367 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7368 LD->getBasePtr(),
7370 LD->getMemOperand());
7371 // Using mask to make bf16 nan-boxing valid when we don't have flh
7372 // instruction. -65536 would be treat as a small number and thus it can be
7373 // directly used lui to get the constant.
7374 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7375 SDValue OrSixteenOne =
7376 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7377 SDValue ConvertedResult =
7378 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7379 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7380}
7381
7382SDValue
7383RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7384 SelectionDAG &DAG) const {
7385 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7386 "Unexpected bfloat16 store lowering");
7387
7388 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7389 SDLoc DL(Op);
7390 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7391 Subtarget.getXLenVT(), ST->getValue());
7392 return DAG.getTruncStore(
7393 ST->getChain(), DL, FMV, ST->getBasePtr(),
7394 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7395 ST->getMemOperand());
7396}
7397
7399 SelectionDAG &DAG) const {
7400 switch (Op.getOpcode()) {
7401 default:
7403 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7404 case ISD::PREFETCH:
7405 return LowerPREFETCH(Op, Subtarget, DAG);
7406 case ISD::ATOMIC_FENCE:
7407 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7408 case ISD::GlobalAddress:
7409 return lowerGlobalAddress(Op, DAG);
7410 case ISD::BlockAddress:
7411 return lowerBlockAddress(Op, DAG);
7412 case ISD::ConstantPool:
7413 return lowerConstantPool(Op, DAG);
7414 case ISD::JumpTable:
7415 return lowerJumpTable(Op, DAG);
7417 return lowerGlobalTLSAddress(Op, DAG);
7418 case ISD::Constant:
7419 return lowerConstant(Op, DAG, Subtarget);
7420 case ISD::ConstantFP:
7421 return lowerConstantFP(Op, DAG);
7422 case ISD::SELECT:
7423 return lowerSELECT(Op, DAG);
7424 case ISD::BRCOND:
7425 return lowerBRCOND(Op, DAG);
7426 case ISD::VASTART:
7427 return lowerVASTART(Op, DAG);
7428 case ISD::FRAMEADDR:
7429 return lowerFRAMEADDR(Op, DAG);
7430 case ISD::RETURNADDR:
7431 return lowerRETURNADDR(Op, DAG);
7432 case ISD::SHL_PARTS:
7433 return lowerShiftLeftParts(Op, DAG);
7434 case ISD::SRA_PARTS:
7435 return lowerShiftRightParts(Op, DAG, true);
7436 case ISD::SRL_PARTS:
7437 return lowerShiftRightParts(Op, DAG, false);
7438 case ISD::ROTL:
7439 case ISD::ROTR:
7440 if (Op.getValueType().isFixedLengthVector()) {
7441 assert(Subtarget.hasStdExtZvkb());
7442 return lowerToScalableOp(Op, DAG);
7443 }
7444 assert(Subtarget.hasVendorXTHeadBb() &&
7445 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7446 "Unexpected custom legalization");
7447 // XTHeadBb only supports rotate by constant.
7448 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7449 return SDValue();
7450 return Op;
7451 case ISD::BITCAST: {
7452 SDLoc DL(Op);
7453 EVT VT = Op.getValueType();
7454 SDValue Op0 = Op.getOperand(0);
7455 EVT Op0VT = Op0.getValueType();
7456 MVT XLenVT = Subtarget.getXLenVT();
7457 if (Op0VT == MVT::i16 &&
7458 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7459 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7460 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7461 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7462 }
7463 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7464 Subtarget.hasStdExtFOrZfinx()) {
7465 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7466 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7467 }
7468 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7469 Subtarget.hasStdExtDOrZdinx()) {
7470 SDValue Lo, Hi;
7471 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7472 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7473 }
7474
7475 // Consider other scalar<->scalar casts as legal if the types are legal.
7476 // Otherwise expand them.
7477 if (!VT.isVector() && !Op0VT.isVector()) {
7478 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7479 return Op;
7480 return SDValue();
7481 }
7482
7483 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7484 "Unexpected types");
7485
7486 if (VT.isFixedLengthVector()) {
7487 // We can handle fixed length vector bitcasts with a simple replacement
7488 // in isel.
7489 if (Op0VT.isFixedLengthVector())
7490 return Op;
7491 // When bitcasting from scalar to fixed-length vector, insert the scalar
7492 // into a one-element vector of the result type, and perform a vector
7493 // bitcast.
7494 if (!Op0VT.isVector()) {
7495 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7496 if (!isTypeLegal(BVT))
7497 return SDValue();
7498 return DAG.getBitcast(
7499 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7500 }
7501 return SDValue();
7502 }
7503 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7504 // thus: bitcast the vector to a one-element vector type whose element type
7505 // is the same as the result type, and extract the first element.
7506 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7507 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7508 if (!isTypeLegal(BVT))
7509 return SDValue();
7510 SDValue BVec = DAG.getBitcast(BVT, Op0);
7511 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7512 }
7513 return SDValue();
7514 }
7516 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7518 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7520 return LowerINTRINSIC_VOID(Op, DAG);
7521 case ISD::IS_FPCLASS:
7522 return LowerIS_FPCLASS(Op, DAG);
7523 case ISD::BITREVERSE: {
7524 MVT VT = Op.getSimpleValueType();
7525 if (VT.isFixedLengthVector()) {
7526 assert(Subtarget.hasStdExtZvbb());
7527 return lowerToScalableOp(Op, DAG);
7528 }
7529 SDLoc DL(Op);
7530 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7531 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7532 // Expand bitreverse to a bswap(rev8) followed by brev8.
7533 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7534 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7535 }
7536 case ISD::TRUNCATE:
7539 // Only custom-lower vector truncates
7540 if (!Op.getSimpleValueType().isVector())
7541 return Op;
7542 return lowerVectorTruncLike(Op, DAG);
7543 case ISD::ANY_EXTEND:
7544 case ISD::ZERO_EXTEND:
7545 if (Op.getOperand(0).getValueType().isVector() &&
7546 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7547 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7548 if (Op.getValueType().isScalableVector())
7549 return Op;
7550 return lowerToScalableOp(Op, DAG);
7551 case ISD::SIGN_EXTEND:
7552 if (Op.getOperand(0).getValueType().isVector() &&
7553 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7554 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7555 if (Op.getValueType().isScalableVector())
7556 return Op;
7557 return lowerToScalableOp(Op, DAG);
7559 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7561 return lowerINSERT_VECTOR_ELT(Op, DAG);
7563 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7564 case ISD::SCALAR_TO_VECTOR: {
7565 MVT VT = Op.getSimpleValueType();
7566 SDLoc DL(Op);
7567 SDValue Scalar = Op.getOperand(0);
7568 if (VT.getVectorElementType() == MVT::i1) {
7569 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7570 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7571 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7572 }
7573 MVT ContainerVT = VT;
7574 if (VT.isFixedLengthVector())
7575 ContainerVT = getContainerForFixedLengthVector(VT);
7576 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7577
7578 SDValue V;
7579 if (VT.isFloatingPoint()) {
7580 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7581 DAG.getUNDEF(ContainerVT), Scalar, VL);
7582 } else {
7583 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7584 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7585 DAG.getUNDEF(ContainerVT), Scalar, VL);
7586 }
7587 if (VT.isFixedLengthVector())
7588 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7589 return V;
7590 }
7591 case ISD::VSCALE: {
7592 MVT XLenVT = Subtarget.getXLenVT();
7593 MVT VT = Op.getSimpleValueType();
7594 SDLoc DL(Op);
7595 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7596 // We define our scalable vector types for lmul=1 to use a 64 bit known
7597 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7598 // vscale as VLENB / 8.
7599 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7600 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7601 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7602 // We assume VLENB is a multiple of 8. We manually choose the best shift
7603 // here because SimplifyDemandedBits isn't always able to simplify it.
7604 uint64_t Val = Op.getConstantOperandVal(0);
7605 if (isPowerOf2_64(Val)) {
7606 uint64_t Log2 = Log2_64(Val);
7607 if (Log2 < 3) {
7608 SDNodeFlags Flags;
7609 Flags.setExact(true);
7610 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7611 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7612 } else if (Log2 > 3) {
7613 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7614 DAG.getConstant(Log2 - 3, DL, XLenVT));
7615 }
7616 } else if ((Val % 8) == 0) {
7617 // If the multiplier is a multiple of 8, scale it down to avoid needing
7618 // to shift the VLENB value.
7619 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7620 DAG.getConstant(Val / 8, DL, XLenVT));
7621 } else {
7622 SDNodeFlags Flags;
7623 Flags.setExact(true);
7624 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7625 DAG.getConstant(3, DL, XLenVT), Flags);
7626 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7627 DAG.getConstant(Val, DL, XLenVT));
7628 }
7629 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7630 }
7631 case ISD::FPOWI: {
7632 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7633 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7634 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7635 Op.getOperand(1).getValueType() == MVT::i32) {
7636 SDLoc DL(Op);
7637 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7638 SDValue Powi =
7639 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7640 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7641 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7642 }
7643 return SDValue();
7644 }
7645 case ISD::FMAXIMUM:
7646 case ISD::FMINIMUM:
7647 if (isPromotedOpNeedingSplit(Op, Subtarget))
7648 return SplitVectorOp(Op, DAG);
7649 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7650 case ISD::FP_EXTEND:
7651 case ISD::FP_ROUND:
7652 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7655 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7656 case ISD::SINT_TO_FP:
7657 case ISD::UINT_TO_FP:
7658 if (Op.getValueType().isVector() &&
7659 ((Op.getValueType().getScalarType() == MVT::f16 &&
7660 (Subtarget.hasVInstructionsF16Minimal() &&
7661 !Subtarget.hasVInstructionsF16())) ||
7662 Op.getValueType().getScalarType() == MVT::bf16)) {
7663 if (isPromotedOpNeedingSplit(Op, Subtarget))
7664 return SplitVectorOp(Op, DAG);
7665 // int -> f32
7666 SDLoc DL(Op);
7667 MVT NVT =
7668 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7669 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7670 // f32 -> [b]f16
7671 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7672 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7673 }
7674 [[fallthrough]];
7675 case ISD::FP_TO_SINT:
7676 case ISD::FP_TO_UINT:
7677 if (SDValue Op1 = Op.getOperand(0);
7678 Op1.getValueType().isVector() &&
7679 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7680 (Subtarget.hasVInstructionsF16Minimal() &&
7681 !Subtarget.hasVInstructionsF16())) ||
7682 Op1.getValueType().getScalarType() == MVT::bf16)) {
7683 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7684 return SplitVectorOp(Op, DAG);
7685 // [b]f16 -> f32
7686 SDLoc DL(Op);
7687 MVT NVT = MVT::getVectorVT(MVT::f32,
7688 Op1.getValueType().getVectorElementCount());
7689 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7690 // f32 -> int
7691 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7692 }
7693 [[fallthrough]];
7698 // RVV can only do fp<->int conversions to types half/double the size as
7699 // the source. We custom-lower any conversions that do two hops into
7700 // sequences.
7701 MVT VT = Op.getSimpleValueType();
7702 if (VT.isScalarInteger())
7703 return lowerFP_TO_INT(Op, DAG, Subtarget);
7704 bool IsStrict = Op->isStrictFPOpcode();
7705 SDValue Src = Op.getOperand(0 + IsStrict);
7706 MVT SrcVT = Src.getSimpleValueType();
7707 if (SrcVT.isScalarInteger())
7708 return lowerINT_TO_FP(Op, DAG, Subtarget);
7709 if (!VT.isVector())
7710 return Op;
7711 SDLoc DL(Op);
7712 MVT EltVT = VT.getVectorElementType();
7713 MVT SrcEltVT = SrcVT.getVectorElementType();
7714 unsigned EltSize = EltVT.getSizeInBits();
7715 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7716 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7717 "Unexpected vector element types");
7718
7719 bool IsInt2FP = SrcEltVT.isInteger();
7720 // Widening conversions
7721 if (EltSize > (2 * SrcEltSize)) {
7722 if (IsInt2FP) {
7723 // Do a regular integer sign/zero extension then convert to float.
7724 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7726 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7727 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7730 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7731 if (IsStrict)
7732 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7733 Op.getOperand(0), Ext);
7734 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7735 }
7736 // FP2Int
7737 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7738 // Do one doubling fp_extend then complete the operation by converting
7739 // to int.
7740 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7741 if (IsStrict) {
7742 auto [FExt, Chain] =
7743 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7744 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7745 }
7746 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7747 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7748 }
7749
7750 // Narrowing conversions
7751 if (SrcEltSize > (2 * EltSize)) {
7752 if (IsInt2FP) {
7753 // One narrowing int_to_fp, then an fp_round.
7754 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7755 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7756 if (IsStrict) {
7757 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7758 DAG.getVTList(InterimFVT, MVT::Other),
7759 Op.getOperand(0), Src);
7760 SDValue Chain = Int2FP.getValue(1);
7761 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7762 }
7763 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7764 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7765 }
7766 // FP2Int
7767 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7768 // representable by the integer, the result is poison.
7769 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7771 if (IsStrict) {
7772 SDValue FP2Int =
7773 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7774 Op.getOperand(0), Src);
7775 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7776 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7777 }
7778 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7779 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7780 }
7781
7782 // Scalable vectors can exit here. Patterns will handle equally-sized
7783 // conversions halving/doubling ones.
7784 if (!VT.isFixedLengthVector())
7785 return Op;
7786
7787 // For fixed-length vectors we lower to a custom "VL" node.
7788 unsigned RVVOpc = 0;
7789 switch (Op.getOpcode()) {
7790 default:
7791 llvm_unreachable("Impossible opcode");
7792 case ISD::FP_TO_SINT:
7793 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7794 break;
7795 case ISD::FP_TO_UINT:
7796 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7797 break;
7798 case ISD::SINT_TO_FP:
7799 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7800 break;
7801 case ISD::UINT_TO_FP:
7802 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7803 break;
7805 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7806 break;
7808 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7809 break;
7811 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7812 break;
7814 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7815 break;
7816 }
7817
7818 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7819 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7820 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7821 "Expected same element count");
7822
7823 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7824
7825 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7826 if (IsStrict) {
7827 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7828 Op.getOperand(0), Src, Mask, VL);
7829 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7830 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7831 }
7832 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7833 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7834 }
7837 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7838 case ISD::FP_TO_BF16: {
7839 // Custom lower to ensure the libcall return is passed in an FPR on hard
7840 // float ABIs.
7841 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7842 SDLoc DL(Op);
7843 MakeLibCallOptions CallOptions;
7844 RTLIB::Libcall LC =
7845 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7846 SDValue Res =
7847 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7848 if (Subtarget.is64Bit())
7849 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7850 return DAG.getBitcast(MVT::i32, Res);
7851 }
7852 case ISD::BF16_TO_FP: {
7853 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7854 MVT VT = Op.getSimpleValueType();
7855 SDLoc DL(Op);
7856 Op = DAG.getNode(
7857 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7858 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7859 SDValue Res = Subtarget.is64Bit()
7860 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7861 : DAG.getBitcast(MVT::f32, Op);
7862 // fp_extend if the target VT is bigger than f32.
7863 if (VT != MVT::f32)
7864 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7865 return Res;
7866 }
7867 case ISD::STRICT_FP_TO_FP16:
7868 case ISD::FP_TO_FP16: {
7869 // Custom lower to ensure the libcall return is passed in an FPR on hard
7870 // float ABIs.
7871 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7872 SDLoc DL(Op);
7873 MakeLibCallOptions CallOptions;
7874 bool IsStrict = Op->isStrictFPOpcode();
7875 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7876 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7877 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7878 SDValue Res;
7879 std::tie(Res, Chain) =
7880 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7881 if (Subtarget.is64Bit())
7882 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7883 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7884 if (IsStrict)
7885 return DAG.getMergeValues({Result, Chain}, DL);
7886 return Result;
7887 }
7888 case ISD::STRICT_FP16_TO_FP:
7889 case ISD::FP16_TO_FP: {
7890 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7891 // float ABIs.
7892 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7893 SDLoc DL(Op);
7894 MakeLibCallOptions CallOptions;
7895 bool IsStrict = Op->isStrictFPOpcode();
7896 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7897 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7898 SDValue Arg = Subtarget.is64Bit()
7899 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7900 : DAG.getBitcast(MVT::f32, Op0);
7901 SDValue Res;
7902 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7903 CallOptions, DL, Chain);
7904 if (IsStrict)
7905 return DAG.getMergeValues({Res, Chain}, DL);
7906 return Res;
7907 }
7908 case ISD::FTRUNC:
7909 case ISD::FCEIL:
7910 case ISD::FFLOOR:
7911 case ISD::FNEARBYINT:
7912 case ISD::FRINT:
7913 case ISD::FROUND:
7914 case ISD::FROUNDEVEN:
7915 if (isPromotedOpNeedingSplit(Op, Subtarget))
7916 return SplitVectorOp(Op, DAG);
7917 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7918 case ISD::LRINT:
7919 case ISD::LLRINT:
7920 case ISD::LROUND:
7921 case ISD::LLROUND: {
7922 if (Op.getValueType().isVector())
7923 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7924 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7925 "Unexpected custom legalisation");
7926 SDLoc DL(Op);
7927 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7928 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7929 }
7930 case ISD::STRICT_LRINT:
7931 case ISD::STRICT_LLRINT:
7932 case ISD::STRICT_LROUND:
7933 case ISD::STRICT_LLROUND: {
7934 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7935 "Unexpected custom legalisation");
7936 SDLoc DL(Op);
7937 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7938 {Op.getOperand(0), Op.getOperand(1)});
7939 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7940 {Ext.getValue(1), Ext.getValue(0)});
7941 }
7942 case ISD::VECREDUCE_ADD:
7943 case ISD::VECREDUCE_UMAX:
7944 case ISD::VECREDUCE_SMAX:
7945 case ISD::VECREDUCE_UMIN:
7946 case ISD::VECREDUCE_SMIN:
7947 return lowerVECREDUCE(Op, DAG);
7948 case ISD::VECREDUCE_AND:
7949 case ISD::VECREDUCE_OR:
7950 case ISD::VECREDUCE_XOR:
7951 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7952 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7953 return lowerVECREDUCE(Op, DAG);
7954 case ISD::VECREDUCE_FADD:
7955 case ISD::VECREDUCE_SEQ_FADD:
7956 case ISD::VECREDUCE_FMIN:
7957 case ISD::VECREDUCE_FMAX:
7958 case ISD::VECREDUCE_FMAXIMUM:
7959 case ISD::VECREDUCE_FMINIMUM:
7960 return lowerFPVECREDUCE(Op, DAG);
7961 case ISD::VP_REDUCE_ADD:
7962 case ISD::VP_REDUCE_UMAX:
7963 case ISD::VP_REDUCE_SMAX:
7964 case ISD::VP_REDUCE_UMIN:
7965 case ISD::VP_REDUCE_SMIN:
7966 case ISD::VP_REDUCE_FADD:
7967 case ISD::VP_REDUCE_SEQ_FADD:
7968 case ISD::VP_REDUCE_FMIN:
7969 case ISD::VP_REDUCE_FMAX:
7970 case ISD::VP_REDUCE_FMINIMUM:
7971 case ISD::VP_REDUCE_FMAXIMUM:
7972 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7973 return SplitVectorReductionOp(Op, DAG);
7974 return lowerVPREDUCE(Op, DAG);
7975 case ISD::VP_REDUCE_AND:
7976 case ISD::VP_REDUCE_OR:
7977 case ISD::VP_REDUCE_XOR:
7978 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7979 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7980 return lowerVPREDUCE(Op, DAG);
7981 case ISD::VP_CTTZ_ELTS:
7982 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7983 return lowerVPCttzElements(Op, DAG);
7984 case ISD::UNDEF: {
7985 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7986 return convertFromScalableVector(Op.getSimpleValueType(),
7987 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7988 }
7990 return lowerINSERT_SUBVECTOR(Op, DAG);
7992 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7994 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7996 return lowerVECTOR_INTERLEAVE(Op, DAG);
7997 case ISD::STEP_VECTOR:
7998 return lowerSTEP_VECTOR(Op, DAG);
8000 return lowerVECTOR_REVERSE(Op, DAG);
8001 case ISD::VECTOR_SPLICE:
8002 return lowerVECTOR_SPLICE(Op, DAG);
8003 case ISD::BUILD_VECTOR: {
8004 MVT VT = Op.getSimpleValueType();
8005 MVT EltVT = VT.getVectorElementType();
8006 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
8007 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
8008 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
8009 }
8010 case ISD::SPLAT_VECTOR: {
8011 MVT VT = Op.getSimpleValueType();
8012 MVT EltVT = VT.getVectorElementType();
8013 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8014 EltVT == MVT::bf16) {
8015 SDLoc DL(Op);
8016 SDValue Elt;
8017 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8018 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8019 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8020 Op.getOperand(0));
8021 else
8022 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8023 MVT IVT = VT.changeVectorElementType(MVT::i16);
8024 return DAG.getNode(ISD::BITCAST, DL, VT,
8025 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8026 }
8027
8028 if (EltVT == MVT::i1)
8029 return lowerVectorMaskSplat(Op, DAG);
8030 return SDValue();
8031 }
8033 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8034 case ISD::CONCAT_VECTORS: {
8035 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8036 // better than going through the stack, as the default expansion does.
8037 SDLoc DL(Op);
8038 MVT VT = Op.getSimpleValueType();
8039 MVT ContainerVT = VT;
8040 if (VT.isFixedLengthVector())
8041 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8042
8043 // Recursively split concat_vectors with more than 2 operands:
8044 //
8045 // concat_vector op1, op2, op3, op4
8046 // ->
8047 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8048 //
8049 // This reduces the length of the chain of vslideups and allows us to
8050 // perform the vslideups at a smaller LMUL, limited to MF2.
8051 if (Op.getNumOperands() > 2 &&
8052 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8053 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8054 assert(isPowerOf2_32(Op.getNumOperands()));
8055 size_t HalfNumOps = Op.getNumOperands() / 2;
8056 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8057 Op->ops().take_front(HalfNumOps));
8058 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8059 Op->ops().drop_front(HalfNumOps));
8060 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8061 }
8062
8063 unsigned NumOpElts =
8064 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8065 SDValue Vec = DAG.getUNDEF(VT);
8066 for (const auto &OpIdx : enumerate(Op->ops())) {
8067 SDValue SubVec = OpIdx.value();
8068 // Don't insert undef subvectors.
8069 if (SubVec.isUndef())
8070 continue;
8071 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8072 }
8073 return Vec;
8074 }
8075 case ISD::LOAD: {
8076 auto *Load = cast<LoadSDNode>(Op);
8077 EVT VT = Load->getValueType(0);
8078 if (VT == MVT::f64) {
8079 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8080 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8081
8082 // Replace a double precision load with two i32 loads and a BuildPairF64.
8083 SDLoc DL(Op);
8084 SDValue BasePtr = Load->getBasePtr();
8085 SDValue Chain = Load->getChain();
8086
8087 SDValue Lo =
8088 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8089 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8090 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8091 SDValue Hi = DAG.getLoad(
8092 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8093 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8094 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8095 Hi.getValue(1));
8096
8097 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8098 return DAG.getMergeValues({Pair, Chain}, DL);
8099 }
8100
8101 if (VT == MVT::bf16)
8102 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8103
8104 // Handle normal vector tuple load.
8105 if (VT.isRISCVVectorTuple()) {
8106 SDLoc DL(Op);
8107 MVT XLenVT = Subtarget.getXLenVT();
8108 unsigned NF = VT.getRISCVVectorTupleNumFields();
8109 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8110 unsigned NumElts = Sz / (NF * 8);
8111 int Log2LMUL = Log2_64(NumElts) - 3;
8112
8113 auto Flag = SDNodeFlags();
8114 Flag.setNoUnsignedWrap(true);
8115 SDValue Ret = DAG.getUNDEF(VT);
8116 SDValue BasePtr = Load->getBasePtr();
8117 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8118 VROffset =
8119 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8120 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8121 SmallVector<SDValue, 8> OutChains;
8122
8123 // Load NF vector registers and combine them to a vector tuple.
8124 for (unsigned i = 0; i < NF; ++i) {
8125 SDValue LoadVal = DAG.getLoad(
8126 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8127 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8128 OutChains.push_back(LoadVal.getValue(1));
8129 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8130 DAG.getTargetConstant(i, DL, MVT::i32));
8131 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8132 }
8133 return DAG.getMergeValues(
8134 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8135 }
8136
8137 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8138 return V;
8139 if (Op.getValueType().isFixedLengthVector())
8140 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8141 return Op;
8142 }
8143 case ISD::STORE: {
8144 auto *Store = cast<StoreSDNode>(Op);
8145 SDValue StoredVal = Store->getValue();
8146 EVT VT = StoredVal.getValueType();
8147 if (VT == MVT::f64) {
8148 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8149 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8150
8151 // Replace a double precision store with a SplitF64 and i32 stores.
8152 SDValue DL(Op);
8153 SDValue BasePtr = Store->getBasePtr();
8154 SDValue Chain = Store->getChain();
8155 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8156 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8157
8158 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8159 Store->getPointerInfo(), Store->getBaseAlign(),
8160 Store->getMemOperand()->getFlags());
8161 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8162 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8163 Store->getPointerInfo().getWithOffset(4),
8164 Store->getBaseAlign(),
8165 Store->getMemOperand()->getFlags());
8166 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8167 }
8168 if (VT == MVT::i64) {
8169 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8170 "Unexpected custom legalisation");
8171 if (Store->isTruncatingStore())
8172 return SDValue();
8173
8174 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8175 return SDValue();
8176
8177 SDLoc DL(Op);
8178 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8179 DAG.getTargetConstant(0, DL, MVT::i32));
8180 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8181 DAG.getTargetConstant(1, DL, MVT::i32));
8182
8183 return DAG.getMemIntrinsicNode(
8184 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8185 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8186 Store->getMemOperand());
8187 }
8188
8189 if (VT == MVT::bf16)
8190 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8191
8192 // Handle normal vector tuple store.
8193 if (VT.isRISCVVectorTuple()) {
8194 SDLoc DL(Op);
8195 MVT XLenVT = Subtarget.getXLenVT();
8196 unsigned NF = VT.getRISCVVectorTupleNumFields();
8197 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8198 unsigned NumElts = Sz / (NF * 8);
8199 int Log2LMUL = Log2_64(NumElts) - 3;
8200
8201 auto Flag = SDNodeFlags();
8202 Flag.setNoUnsignedWrap(true);
8203 SDValue Ret;
8204 SDValue Chain = Store->getChain();
8205 SDValue BasePtr = Store->getBasePtr();
8206 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8207 VROffset =
8208 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8209 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8210
8211 // Extract subregisters in a vector tuple and store them individually.
8212 for (unsigned i = 0; i < NF; ++i) {
8213 auto Extract =
8214 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8215 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8216 DAG.getTargetConstant(i, DL, MVT::i32));
8217 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8218 MachinePointerInfo(Store->getAddressSpace()),
8219 Store->getBaseAlign(),
8220 Store->getMemOperand()->getFlags());
8221 Chain = Ret.getValue(0);
8222 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8223 }
8224 return Ret;
8225 }
8226
8227 if (auto V = expandUnalignedRVVStore(Op, DAG))
8228 return V;
8229 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8230 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8231 return Op;
8232 }
8233 case ISD::MLOAD:
8234 case ISD::VP_LOAD:
8235 return lowerMaskedLoad(Op, DAG);
8236 case ISD::VP_LOAD_FF:
8237 return lowerLoadFF(Op, DAG);
8238 case ISD::MSTORE:
8239 case ISD::VP_STORE:
8240 return lowerMaskedStore(Op, DAG);
8242 return lowerVectorCompress(Op, DAG);
8243 case ISD::SELECT_CC: {
8244 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8245 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8246 // into separate SETCC+SELECT just like LegalizeDAG.
8247 SDValue Tmp1 = Op.getOperand(0);
8248 SDValue Tmp2 = Op.getOperand(1);
8249 SDValue True = Op.getOperand(2);
8250 SDValue False = Op.getOperand(3);
8251 EVT VT = Op.getValueType();
8252 SDValue CC = Op.getOperand(4);
8253 EVT CmpVT = Tmp1.getValueType();
8254 EVT CCVT =
8255 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8256 SDLoc DL(Op);
8257 SDValue Cond =
8258 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8259 return DAG.getSelect(DL, VT, Cond, True, False);
8260 }
8261 case ISD::SETCC: {
8262 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8263 if (OpVT.isScalarInteger()) {
8264 MVT VT = Op.getSimpleValueType();
8265 SDValue LHS = Op.getOperand(0);
8266 SDValue RHS = Op.getOperand(1);
8267 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8268 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8269 "Unexpected CondCode");
8270
8271 SDLoc DL(Op);
8272
8273 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8274 // convert this to the equivalent of (set(u)ge X, C+1) by using
8275 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8276 // in a register.
8277 if (isa<ConstantSDNode>(RHS)) {
8278 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8279 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8280 // If this is an unsigned compare and the constant is -1, incrementing
8281 // the constant would change behavior. The result should be false.
8282 if (CCVal == ISD::SETUGT && Imm == -1)
8283 return DAG.getConstant(0, DL, VT);
8284 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8285 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8286 SDValue SetCC = DAG.getSetCC(
8287 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8288 return DAG.getLogicalNOT(DL, SetCC, VT);
8289 }
8290 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8291 if (CCVal == ISD::SETUGT && Imm == 2047) {
8292 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8293 DAG.getShiftAmountConstant(11, OpVT, DL));
8294 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8295 ISD::SETNE);
8296 }
8297 }
8298
8299 // Not a constant we could handle, swap the operands and condition code to
8300 // SETLT/SETULT.
8301 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8302 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8303 }
8304
8305 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8306 return SplitVectorOp(Op, DAG);
8307
8308 return lowerToScalableOp(Op, DAG);
8309 }
8310 case ISD::ADD:
8311 case ISD::SUB:
8312 case ISD::MUL:
8313 case ISD::MULHS:
8314 case ISD::MULHU:
8315 case ISD::AND:
8316 case ISD::OR:
8317 case ISD::XOR:
8318 case ISD::SDIV:
8319 case ISD::SREM:
8320 case ISD::UDIV:
8321 case ISD::UREM:
8322 case ISD::BSWAP:
8323 case ISD::CTPOP:
8324 case ISD::VSELECT:
8325 return lowerToScalableOp(Op, DAG);
8326 case ISD::SHL:
8327 case ISD::SRA:
8328 case ISD::SRL:
8329 if (Op.getSimpleValueType().isFixedLengthVector())
8330 return lowerToScalableOp(Op, DAG);
8331 // This can be called for an i32 shift amount that needs to be promoted.
8332 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8333 "Unexpected custom legalisation");
8334 return SDValue();
8335 case ISD::FABS:
8336 case ISD::FNEG:
8337 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8338 return lowerFABSorFNEG(Op, DAG, Subtarget);
8339 [[fallthrough]];
8340 case ISD::FADD:
8341 case ISD::FSUB:
8342 case ISD::FMUL:
8343 case ISD::FDIV:
8344 case ISD::FSQRT:
8345 case ISD::FMA:
8346 case ISD::FMINNUM:
8347 case ISD::FMAXNUM:
8348 case ISD::FMINIMUMNUM:
8349 case ISD::FMAXIMUMNUM:
8350 if (isPromotedOpNeedingSplit(Op, Subtarget))
8351 return SplitVectorOp(Op, DAG);
8352 [[fallthrough]];
8353 case ISD::AVGFLOORS:
8354 case ISD::AVGFLOORU:
8355 case ISD::AVGCEILS:
8356 case ISD::AVGCEILU:
8357 case ISD::SMIN:
8358 case ISD::SMAX:
8359 case ISD::UMIN:
8360 case ISD::UMAX:
8361 case ISD::UADDSAT:
8362 case ISD::USUBSAT:
8363 case ISD::SADDSAT:
8364 case ISD::SSUBSAT:
8365 return lowerToScalableOp(Op, DAG);
8366 case ISD::ABDS:
8367 case ISD::ABDU: {
8368 SDLoc dl(Op);
8369 EVT VT = Op->getValueType(0);
8370 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8371 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8372 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8373
8374 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8375 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8376 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8377 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8378 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8379 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8380 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8381 }
8382 case ISD::ABS:
8383 case ISD::VP_ABS:
8384 return lowerABS(Op, DAG);
8385 case ISD::CTLZ:
8387 case ISD::CTTZ:
8389 if (Subtarget.hasStdExtZvbb())
8390 return lowerToScalableOp(Op, DAG);
8391 assert(Op.getOpcode() != ISD::CTTZ);
8392 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8393 case ISD::FCOPYSIGN:
8394 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8395 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8396 if (isPromotedOpNeedingSplit(Op, Subtarget))
8397 return SplitVectorOp(Op, DAG);
8398 return lowerToScalableOp(Op, DAG);
8399 case ISD::STRICT_FADD:
8400 case ISD::STRICT_FSUB:
8401 case ISD::STRICT_FMUL:
8402 case ISD::STRICT_FDIV:
8403 case ISD::STRICT_FSQRT:
8404 case ISD::STRICT_FMA:
8405 if (isPromotedOpNeedingSplit(Op, Subtarget))
8406 return SplitStrictFPVectorOp(Op, DAG);
8407 return lowerToScalableOp(Op, DAG);
8408 case ISD::STRICT_FSETCC:
8410 return lowerVectorStrictFSetcc(Op, DAG);
8411 case ISD::STRICT_FCEIL:
8412 case ISD::STRICT_FRINT:
8413 case ISD::STRICT_FFLOOR:
8414 case ISD::STRICT_FTRUNC:
8416 case ISD::STRICT_FROUND:
8418 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8419 case ISD::MGATHER:
8420 case ISD::VP_GATHER:
8421 return lowerMaskedGather(Op, DAG);
8422 case ISD::MSCATTER:
8423 case ISD::VP_SCATTER:
8424 return lowerMaskedScatter(Op, DAG);
8425 case ISD::GET_ROUNDING:
8426 return lowerGET_ROUNDING(Op, DAG);
8427 case ISD::SET_ROUNDING:
8428 return lowerSET_ROUNDING(Op, DAG);
8429 case ISD::GET_FPENV:
8430 return lowerGET_FPENV(Op, DAG);
8431 case ISD::SET_FPENV:
8432 return lowerSET_FPENV(Op, DAG);
8433 case ISD::RESET_FPENV:
8434 return lowerRESET_FPENV(Op, DAG);
8435 case ISD::GET_FPMODE:
8436 return lowerGET_FPMODE(Op, DAG);
8437 case ISD::SET_FPMODE:
8438 return lowerSET_FPMODE(Op, DAG);
8439 case ISD::RESET_FPMODE:
8440 return lowerRESET_FPMODE(Op, DAG);
8441 case ISD::EH_DWARF_CFA:
8442 return lowerEH_DWARF_CFA(Op, DAG);
8443 case ISD::VP_MERGE:
8444 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8445 return lowerVPMergeMask(Op, DAG);
8446 [[fallthrough]];
8447 case ISD::VP_SELECT:
8448 case ISD::VP_ADD:
8449 case ISD::VP_SUB:
8450 case ISD::VP_MUL:
8451 case ISD::VP_SDIV:
8452 case ISD::VP_UDIV:
8453 case ISD::VP_SREM:
8454 case ISD::VP_UREM:
8455 case ISD::VP_UADDSAT:
8456 case ISD::VP_USUBSAT:
8457 case ISD::VP_SADDSAT:
8458 case ISD::VP_SSUBSAT:
8459 case ISD::VP_LRINT:
8460 case ISD::VP_LLRINT:
8461 return lowerVPOp(Op, DAG);
8462 case ISD::VP_AND:
8463 case ISD::VP_OR:
8464 case ISD::VP_XOR:
8465 return lowerLogicVPOp(Op, DAG);
8466 case ISD::VP_FADD:
8467 case ISD::VP_FSUB:
8468 case ISD::VP_FMUL:
8469 case ISD::VP_FDIV:
8470 case ISD::VP_FNEG:
8471 case ISD::VP_FABS:
8472 case ISD::VP_SQRT:
8473 case ISD::VP_FMA:
8474 case ISD::VP_FMINNUM:
8475 case ISD::VP_FMAXNUM:
8476 case ISD::VP_FCOPYSIGN:
8477 if (isPromotedOpNeedingSplit(Op, Subtarget))
8478 return SplitVPOp(Op, DAG);
8479 [[fallthrough]];
8480 case ISD::VP_SRA:
8481 case ISD::VP_SRL:
8482 case ISD::VP_SHL:
8483 return lowerVPOp(Op, DAG);
8484 case ISD::VP_IS_FPCLASS:
8485 return LowerIS_FPCLASS(Op, DAG);
8486 case ISD::VP_SIGN_EXTEND:
8487 case ISD::VP_ZERO_EXTEND:
8488 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8489 return lowerVPExtMaskOp(Op, DAG);
8490 return lowerVPOp(Op, DAG);
8491 case ISD::VP_TRUNCATE:
8492 return lowerVectorTruncLike(Op, DAG);
8493 case ISD::VP_FP_EXTEND:
8494 case ISD::VP_FP_ROUND:
8495 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8496 case ISD::VP_SINT_TO_FP:
8497 case ISD::VP_UINT_TO_FP:
8498 if (Op.getValueType().isVector() &&
8499 ((Op.getValueType().getScalarType() == MVT::f16 &&
8500 (Subtarget.hasVInstructionsF16Minimal() &&
8501 !Subtarget.hasVInstructionsF16())) ||
8502 Op.getValueType().getScalarType() == MVT::bf16)) {
8503 if (isPromotedOpNeedingSplit(Op, Subtarget))
8504 return SplitVectorOp(Op, DAG);
8505 // int -> f32
8506 SDLoc DL(Op);
8507 MVT NVT =
8508 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8509 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8510 // f32 -> [b]f16
8511 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8512 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8513 }
8514 [[fallthrough]];
8515 case ISD::VP_FP_TO_SINT:
8516 case ISD::VP_FP_TO_UINT:
8517 if (SDValue Op1 = Op.getOperand(0);
8518 Op1.getValueType().isVector() &&
8519 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8520 (Subtarget.hasVInstructionsF16Minimal() &&
8521 !Subtarget.hasVInstructionsF16())) ||
8522 Op1.getValueType().getScalarType() == MVT::bf16)) {
8523 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8524 return SplitVectorOp(Op, DAG);
8525 // [b]f16 -> f32
8526 SDLoc DL(Op);
8527 MVT NVT = MVT::getVectorVT(MVT::f32,
8528 Op1.getValueType().getVectorElementCount());
8529 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8530 // f32 -> int
8531 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8532 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8533 }
8534 return lowerVPFPIntConvOp(Op, DAG);
8535 case ISD::VP_SETCC:
8536 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8537 return SplitVPOp(Op, DAG);
8538 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8539 return lowerVPSetCCMaskOp(Op, DAG);
8540 [[fallthrough]];
8541 case ISD::VP_SMIN:
8542 case ISD::VP_SMAX:
8543 case ISD::VP_UMIN:
8544 case ISD::VP_UMAX:
8545 case ISD::VP_BITREVERSE:
8546 case ISD::VP_BSWAP:
8547 return lowerVPOp(Op, DAG);
8548 case ISD::VP_CTLZ:
8549 case ISD::VP_CTLZ_ZERO_UNDEF:
8550 if (Subtarget.hasStdExtZvbb())
8551 return lowerVPOp(Op, DAG);
8552 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8553 case ISD::VP_CTTZ:
8554 case ISD::VP_CTTZ_ZERO_UNDEF:
8555 if (Subtarget.hasStdExtZvbb())
8556 return lowerVPOp(Op, DAG);
8557 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8558 case ISD::VP_CTPOP:
8559 return lowerVPOp(Op, DAG);
8560 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8561 return lowerVPStridedLoad(Op, DAG);
8562 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8563 return lowerVPStridedStore(Op, DAG);
8564 case ISD::VP_FCEIL:
8565 case ISD::VP_FFLOOR:
8566 case ISD::VP_FRINT:
8567 case ISD::VP_FNEARBYINT:
8568 case ISD::VP_FROUND:
8569 case ISD::VP_FROUNDEVEN:
8570 case ISD::VP_FROUNDTOZERO:
8571 if (isPromotedOpNeedingSplit(Op, Subtarget))
8572 return SplitVPOp(Op, DAG);
8573 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8574 case ISD::VP_FMAXIMUM:
8575 case ISD::VP_FMINIMUM:
8576 if (isPromotedOpNeedingSplit(Op, Subtarget))
8577 return SplitVPOp(Op, DAG);
8578 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8579 case ISD::EXPERIMENTAL_VP_SPLICE:
8580 return lowerVPSpliceExperimental(Op, DAG);
8581 case ISD::EXPERIMENTAL_VP_REVERSE:
8582 return lowerVPReverseExperimental(Op, DAG);
8583 case ISD::EXPERIMENTAL_VP_SPLAT:
8584 return lowerVPSplatExperimental(Op, DAG);
8585 case ISD::CLEAR_CACHE: {
8586 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8587 "llvm.clear_cache only needs custom lower on Linux targets");
8588 SDLoc DL(Op);
8589 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8590 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8591 Op.getOperand(2), Flags, DL);
8592 }
8593 case ISD::DYNAMIC_STACKALLOC:
8594 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8595 case ISD::INIT_TRAMPOLINE:
8596 return lowerINIT_TRAMPOLINE(Op, DAG);
8597 case ISD::ADJUST_TRAMPOLINE:
8598 return lowerADJUST_TRAMPOLINE(Op, DAG);
8599 case ISD::PARTIAL_REDUCE_UMLA:
8600 case ISD::PARTIAL_REDUCE_SMLA:
8601 case ISD::PARTIAL_REDUCE_SUMLA:
8602 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8603 }
8604}
8605
8606SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8607 SDValue Start, SDValue End,
8608 SDValue Flags, SDLoc DL) const {
8609 MakeLibCallOptions CallOptions;
8610 std::pair<SDValue, SDValue> CallResult =
8611 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8612 {Start, End, Flags}, CallOptions, DL, InChain);
8613
8614 // This function returns void so only the out chain matters.
8615 return CallResult.second;
8616}
8617
8618SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8619 SelectionDAG &DAG) const {
8620 if (!Subtarget.is64Bit())
8621 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8622
8623 // Create an MCCodeEmitter to encode instructions.
8624 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8625 assert(TLO);
8626 MCContext &MCCtx = TLO->getContext();
8627
8628 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8629 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8630
8631 SDValue Root = Op.getOperand(0);
8632 SDValue Trmp = Op.getOperand(1); // trampoline
8633 SDLoc dl(Op);
8634
8635 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8636
8637 // We store in the trampoline buffer the following instructions and data.
8638 // Offset:
8639 // 0: auipc t2, 0
8640 // 4: ld t0, 24(t2)
8641 // 8: ld t2, 16(t2)
8642 // 12: jalr t0
8643 // 16: <StaticChainOffset>
8644 // 24: <FunctionAddressOffset>
8645 // 32:
8646 // Offset with branch control flow protection enabled:
8647 // 0: lpad <imm20>
8648 // 4: auipc t3, 0
8649 // 8: ld t2, 28(t3)
8650 // 12: ld t3, 20(t3)
8651 // 16: jalr t2
8652 // 20: <StaticChainOffset>
8653 // 28: <FunctionAddressOffset>
8654 // 36:
8655
8656 const bool HasCFBranch =
8657 Subtarget.hasStdExtZicfilp() &&
8659 "cf-protection-branch");
8660 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8661 const unsigned StaticChainOffset = StaticChainIdx * 4;
8662 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8663
8664 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8665 assert(STI);
8666 auto GetEncoding = [&](const MCInst &MC) {
8669 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8670 uint32_t Encoding = support::endian::read32le(CB.data());
8671 return Encoding;
8672 };
8673
8674 SmallVector<SDValue> OutChains;
8675
8676 SmallVector<uint32_t> Encodings;
8677 if (!HasCFBranch) {
8678 Encodings.append(
8679 {// auipc t2, 0
8680 // Loads the current PC into t2.
8681 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8682 // ld t0, 24(t2)
8683 // Loads the function address into t0. Note that we are using offsets
8684 // pc-relative to the first instruction of the trampoline.
8685 GetEncoding(MCInstBuilder(RISCV::LD)
8686 .addReg(RISCV::X5)
8687 .addReg(RISCV::X7)
8688 .addImm(FunctionAddressOffset)),
8689 // ld t2, 16(t2)
8690 // Load the value of the static chain.
8691 GetEncoding(MCInstBuilder(RISCV::LD)
8692 .addReg(RISCV::X7)
8693 .addReg(RISCV::X7)
8694 .addImm(StaticChainOffset)),
8695 // jalr t0
8696 // Jump to the function.
8697 GetEncoding(MCInstBuilder(RISCV::JALR)
8698 .addReg(RISCV::X0)
8699 .addReg(RISCV::X5)
8700 .addImm(0))});
8701 } else {
8702 Encodings.append(
8703 {// auipc x0, <imm20> (lpad <imm20>)
8704 // Landing pad.
8705 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8706 // auipc t3, 0
8707 // Loads the current PC into t3.
8708 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8709 // ld t2, (FunctionAddressOffset - 4)(t3)
8710 // Loads the function address into t2. Note that we are using offsets
8711 // pc-relative to the SECOND instruction of the trampoline.
8712 GetEncoding(MCInstBuilder(RISCV::LD)
8713 .addReg(RISCV::X7)
8714 .addReg(RISCV::X28)
8715 .addImm(FunctionAddressOffset - 4)),
8716 // ld t3, (StaticChainOffset - 4)(t3)
8717 // Load the value of the static chain.
8718 GetEncoding(MCInstBuilder(RISCV::LD)
8719 .addReg(RISCV::X28)
8720 .addReg(RISCV::X28)
8721 .addImm(StaticChainOffset - 4)),
8722 // jalr t2
8723 // Software-guarded jump to the function.
8724 GetEncoding(MCInstBuilder(RISCV::JALR)
8725 .addReg(RISCV::X0)
8726 .addReg(RISCV::X7)
8727 .addImm(0))});
8728 }
8729
8730 // Store encoded instructions.
8731 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8732 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8733 DAG.getConstant(Idx * 4, dl, MVT::i64))
8734 : Trmp;
8735 OutChains.push_back(DAG.getTruncStore(
8736 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8737 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8738 }
8739
8740 // Now store the variable part of the trampoline.
8741 SDValue FunctionAddress = Op.getOperand(2);
8742 SDValue StaticChain = Op.getOperand(3);
8743
8744 // Store the given static chain and function pointer in the trampoline buffer.
8745 struct OffsetValuePair {
8746 const unsigned Offset;
8747 const SDValue Value;
8748 SDValue Addr = SDValue(); // Used to cache the address.
8749 } OffsetValues[] = {
8750 {StaticChainOffset, StaticChain},
8751 {FunctionAddressOffset, FunctionAddress},
8752 };
8753 for (auto &OffsetValue : OffsetValues) {
8754 SDValue Addr =
8755 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8756 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8757 OffsetValue.Addr = Addr;
8758 OutChains.push_back(
8759 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8760 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8761 }
8762
8763 assert(OutChains.size() == StaticChainIdx + 2 &&
8764 "Size of OutChains mismatch");
8765 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8766
8767 // The end of instructions of trampoline is the same as the static chain
8768 // address that we computed earlier.
8769 SDValue EndOfTrmp = OffsetValues[0].Addr;
8770
8771 // Call clear cache on the trampoline instructions.
8772 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8773 Trmp, EndOfTrmp);
8774
8775 return Chain;
8776}
8777
8778SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8779 SelectionDAG &DAG) const {
8780 if (!Subtarget.is64Bit())
8781 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8782
8783 return Op.getOperand(0);
8784}
8785
8786SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8787 SelectionDAG &DAG) const {
8788 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8789 // TODO: There are many other sub-cases we could potentially lower, are
8790 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8791 SDLoc DL(Op);
8792 MVT VT = Op.getSimpleValueType();
8793 SDValue Accum = Op.getOperand(0);
8794 assert(Accum.getSimpleValueType() == VT &&
8795 VT.getVectorElementType() == MVT::i32);
8796 SDValue A = Op.getOperand(1);
8797 SDValue B = Op.getOperand(2);
8798 MVT ArgVT = A.getSimpleValueType();
8799 assert(ArgVT == B.getSimpleValueType() &&
8800 ArgVT.getVectorElementType() == MVT::i8);
8801 (void)ArgVT;
8802
8803 // The zvqdotq pseudos are defined with sources and destination both
8804 // being i32. This cast is needed for correctness to avoid incorrect
8805 // .vx matching of i8 splats.
8806 A = DAG.getBitcast(VT, A);
8807 B = DAG.getBitcast(VT, B);
8808
8809 MVT ContainerVT = VT;
8810 if (VT.isFixedLengthVector()) {
8811 ContainerVT = getContainerForFixedLengthVector(VT);
8812 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8813 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8814 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8815 }
8816
8817 unsigned Opc;
8818 switch (Op.getOpcode()) {
8819 case ISD::PARTIAL_REDUCE_SMLA:
8820 Opc = RISCVISD::VQDOT_VL;
8821 break;
8822 case ISD::PARTIAL_REDUCE_UMLA:
8823 Opc = RISCVISD::VQDOTU_VL;
8824 break;
8825 case ISD::PARTIAL_REDUCE_SUMLA:
8826 Opc = RISCVISD::VQDOTSU_VL;
8827 break;
8828 default:
8829 llvm_unreachable("Unexpected opcode");
8830 }
8831 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8832 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8833 if (VT.isFixedLengthVector())
8834 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8835 return Res;
8836}
8837
8839 SelectionDAG &DAG, unsigned Flags) {
8840 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8841}
8842
8844 SelectionDAG &DAG, unsigned Flags) {
8845 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8846 Flags);
8847}
8848
8850 SelectionDAG &DAG, unsigned Flags) {
8851 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8852 N->getOffset(), Flags);
8853}
8854
8856 SelectionDAG &DAG, unsigned Flags) {
8857 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8858}
8859
8861 EVT Ty, SelectionDAG &DAG) {
8863 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8864 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8865 return DAG.getLoad(
8866 Ty, DL, DAG.getEntryNode(), LC,
8868}
8869
8871 EVT Ty, SelectionDAG &DAG) {
8873 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8874 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8875 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8876 return DAG.getLoad(
8877 Ty, DL, DAG.getEntryNode(), LC,
8879}
8880
8881template <class NodeTy>
8882SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8883 bool IsLocal, bool IsExternWeak) const {
8884 SDLoc DL(N);
8885 EVT Ty = getPointerTy(DAG.getDataLayout());
8886
8887 // When HWASAN is used and tagging of global variables is enabled
8888 // they should be accessed via the GOT, since the tagged address of a global
8889 // is incompatible with existing code models. This also applies to non-pic
8890 // mode.
8891 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8892 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8893 if (IsLocal && !Subtarget.allowTaggedGlobals())
8894 // Use PC-relative addressing to access the symbol. This generates the
8895 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8896 // %pcrel_lo(auipc)).
8897 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8898
8899 // Use PC-relative addressing to access the GOT for this symbol, then load
8900 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8901 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8902 SDValue Load =
8903 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8904 MachineFunction &MF = DAG.getMachineFunction();
8905 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8909 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8910 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8911 return Load;
8912 }
8913
8914 switch (getTargetMachine().getCodeModel()) {
8915 default:
8916 reportFatalUsageError("Unsupported code model for lowering");
8917 case CodeModel::Small: {
8918 // Generate a sequence for accessing addresses within the first 2 GiB of
8919 // address space.
8920 if (Subtarget.hasVendorXqcili()) {
8921 // Use QC.E.LI to generate the address, as this is easier to relax than
8922 // LUI/ADDI.
8923 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8924 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8925 }
8926
8927 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
8928 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
8929 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
8930 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8931 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
8932 }
8933 case CodeModel::Medium: {
8934 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8935 if (IsExternWeak) {
8936 // An extern weak symbol may be undefined, i.e. have value 0, which may
8937 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
8938 // symbol. This generates the pattern (PseudoLGA sym), which expands to
8939 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8940 SDValue Load =
8941 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8942 MachineFunction &MF = DAG.getMachineFunction();
8943 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8947 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8948 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8949 return Load;
8950 }
8951
8952 // Generate a sequence for accessing addresses within any 2GiB range within
8953 // the address space. This generates the pattern (PseudoLLA sym), which
8954 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8955 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8956 }
8957 case CodeModel::Large: {
8958 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8959 return getLargeGlobalAddress(G, DL, Ty, DAG);
8960
8961 // Using pc-relative mode for other node type.
8962 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8963 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8964 }
8965 }
8966}
8967
8968SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8969 SelectionDAG &DAG) const {
8970 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8971 assert(N->getOffset() == 0 && "unexpected offset in global node");
8972 const GlobalValue *GV = N->getGlobal();
8973 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8974}
8975
8976SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8977 SelectionDAG &DAG) const {
8978 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8979
8980 return getAddr(N, DAG);
8981}
8982
8983SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8984 SelectionDAG &DAG) const {
8985 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8986
8987 return getAddr(N, DAG);
8988}
8989
8990SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8991 SelectionDAG &DAG) const {
8992 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8993
8994 return getAddr(N, DAG);
8995}
8996
8997SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8998 SelectionDAG &DAG,
8999 bool UseGOT) const {
9000 SDLoc DL(N);
9001 EVT Ty = getPointerTy(DAG.getDataLayout());
9002 const GlobalValue *GV = N->getGlobal();
9003 MVT XLenVT = Subtarget.getXLenVT();
9004
9005 if (UseGOT) {
9006 // Use PC-relative addressing to access the GOT for this TLS symbol, then
9007 // load the address from the GOT and add the thread pointer. This generates
9008 // the pattern (PseudoLA_TLS_IE sym), which expands to
9009 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9010 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9011 SDValue Load =
9012 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9013 MachineFunction &MF = DAG.getMachineFunction();
9014 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9018 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9019 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9020
9021 // Add the thread pointer.
9022 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9023 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9024 }
9025
9026 // Generate a sequence for accessing the address relative to the thread
9027 // pointer, with the appropriate adjustment for the thread pointer offset.
9028 // This generates the pattern
9029 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9030 SDValue AddrHi =
9032 SDValue AddrAdd =
9034 SDValue AddrLo =
9036
9037 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9038 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9039 SDValue MNAdd =
9040 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9041 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9042}
9043
9044SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9045 SelectionDAG &DAG) const {
9046 SDLoc DL(N);
9047 EVT Ty = getPointerTy(DAG.getDataLayout());
9048 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9049 const GlobalValue *GV = N->getGlobal();
9050
9051 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9052 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9053 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9054 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9055 SDValue Load =
9056 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9057
9058 // Prepare argument list to generate call.
9060 Args.emplace_back(Load, CallTy);
9061
9062 // Setup call to __tls_get_addr.
9063 TargetLowering::CallLoweringInfo CLI(DAG);
9064 CLI.setDebugLoc(DL)
9065 .setChain(DAG.getEntryNode())
9066 .setLibCallee(CallingConv::C, CallTy,
9067 DAG.getExternalSymbol("__tls_get_addr", Ty),
9068 std::move(Args));
9069
9070 return LowerCallTo(CLI).first;
9071}
9072
9073SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9074 SelectionDAG &DAG) const {
9075 SDLoc DL(N);
9076 EVT Ty = getPointerTy(DAG.getDataLayout());
9077 const GlobalValue *GV = N->getGlobal();
9078
9079 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9080 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9081 //
9082 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9083 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9084 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9085 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9086 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9087 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9088}
9089
9090SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9091 SelectionDAG &DAG) const {
9092 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9093 assert(N->getOffset() == 0 && "unexpected offset in global node");
9094
9095 if (DAG.getTarget().useEmulatedTLS())
9096 return LowerToTLSEmulatedModel(N, DAG);
9097
9099
9102 reportFatalUsageError("In GHC calling convention TLS is not supported");
9103
9104 SDValue Addr;
9105 switch (Model) {
9107 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9108 break;
9110 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9111 break;
9114 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9115 : getDynamicTLSAddr(N, DAG);
9116 break;
9117 }
9118
9119 return Addr;
9120}
9121
9122// Return true if Val is equal to (setcc LHS, RHS, CC).
9123// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9124// Otherwise, return std::nullopt.
9125static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9126 ISD::CondCode CC, SDValue Val) {
9127 assert(Val->getOpcode() == ISD::SETCC);
9128 SDValue LHS2 = Val.getOperand(0);
9129 SDValue RHS2 = Val.getOperand(1);
9130 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9131
9132 if (LHS == LHS2 && RHS == RHS2) {
9133 if (CC == CC2)
9134 return true;
9135 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9136 return false;
9137 } else if (LHS == RHS2 && RHS == LHS2) {
9139 if (CC == CC2)
9140 return true;
9141 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9142 return false;
9143 }
9144
9145 return std::nullopt;
9146}
9147
9149 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9150}
9151
9153 const RISCVSubtarget &Subtarget) {
9154 SDValue CondV = N->getOperand(0);
9155 SDValue TrueV = N->getOperand(1);
9156 SDValue FalseV = N->getOperand(2);
9157 MVT VT = N->getSimpleValueType(0);
9158 SDLoc DL(N);
9159
9160 if (!Subtarget.hasConditionalMoveFusion()) {
9161 // (select c, -1, y) -> -c | y
9162 if (isAllOnesConstant(TrueV)) {
9163 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9164 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9165 }
9166 // (select c, y, -1) -> (c-1) | y
9167 if (isAllOnesConstant(FalseV)) {
9168 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9169 DAG.getAllOnesConstant(DL, VT));
9170 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9171 }
9172
9173 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9174
9175 // (select c, 0, y) -> (c-1) & y
9176 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9177 SDValue Neg =
9178 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9179 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9180 }
9181 if (isNullConstant(FalseV)) {
9182 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9183 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9184 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9185 if (isPowerOf2_64(TrueM1)) {
9186 unsigned ShAmount = Log2_64(TrueM1);
9187 if (Subtarget.hasShlAdd(ShAmount))
9188 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9189 DAG.getTargetConstant(ShAmount, DL, VT), CondV);
9190 }
9191 }
9192 // (select c, y, 0) -> -c & y
9193 if (!HasCZero || isSimm12Constant(TrueV)) {
9194 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9195 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9196 }
9197 }
9198 }
9199
9200 // select c, ~x, x --> xor -c, x
9201 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9202 const APInt &TrueVal = TrueV->getAsAPIntVal();
9203 const APInt &FalseVal = FalseV->getAsAPIntVal();
9204 if (~TrueVal == FalseVal) {
9205 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9206 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9207 }
9208 }
9209
9210 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9211 // when both truev and falsev are also setcc.
9212 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9213 FalseV.getOpcode() == ISD::SETCC) {
9214 SDValue LHS = CondV.getOperand(0);
9215 SDValue RHS = CondV.getOperand(1);
9216 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9217
9218 // (select x, x, y) -> x | y
9219 // (select !x, x, y) -> x & y
9220 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9221 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9222 DAG.getFreeze(FalseV));
9223 }
9224 // (select x, y, x) -> x & y
9225 // (select !x, y, x) -> x | y
9226 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9227 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9228 DAG.getFreeze(TrueV), FalseV);
9229 }
9230 }
9231
9232 return SDValue();
9233}
9234
9235// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9236// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9237// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9238// being `0` or `-1`. In such cases we can replace `select` with `and`.
9239// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9240// than `c0`?
9241static SDValue
9243 const RISCVSubtarget &Subtarget) {
9244 if (Subtarget.hasShortForwardBranchOpt())
9245 return SDValue();
9246
9247 unsigned SelOpNo = 0;
9248 SDValue Sel = BO->getOperand(0);
9249 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9250 SelOpNo = 1;
9251 Sel = BO->getOperand(1);
9252 }
9253
9254 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9255 return SDValue();
9256
9257 unsigned ConstSelOpNo = 1;
9258 unsigned OtherSelOpNo = 2;
9259 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9260 ConstSelOpNo = 2;
9261 OtherSelOpNo = 1;
9262 }
9263 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9264 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9265 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9266 return SDValue();
9267
9268 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9269 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9270 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9271 return SDValue();
9272
9273 SDLoc DL(Sel);
9274 EVT VT = BO->getValueType(0);
9275
9276 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9277 if (SelOpNo == 1)
9278 std::swap(NewConstOps[0], NewConstOps[1]);
9279
9280 SDValue NewConstOp =
9281 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9282 if (!NewConstOp)
9283 return SDValue();
9284
9285 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9286 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9287 return SDValue();
9288
9289 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9290 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9291 if (SelOpNo == 1)
9292 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9293 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9294
9295 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9296 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9297 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9298}
9299
9300SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9301 SDValue CondV = Op.getOperand(0);
9302 SDValue TrueV = Op.getOperand(1);
9303 SDValue FalseV = Op.getOperand(2);
9304 SDLoc DL(Op);
9305 MVT VT = Op.getSimpleValueType();
9306 MVT XLenVT = Subtarget.getXLenVT();
9307
9308 // Lower vector SELECTs to VSELECTs by splatting the condition.
9309 if (VT.isVector()) {
9310 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9311 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9312 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9313 }
9314
9315 // Try some other optimizations before falling back to generic lowering.
9316 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9317 return V;
9318
9319 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9320 // nodes to implement the SELECT. Performing the lowering here allows for
9321 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9322 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9323 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9324
9325 // (select c, t, 0) -> (czero_eqz t, c)
9326 if (isNullConstant(FalseV))
9327 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9328 // (select c, 0, f) -> (czero_nez f, c)
9329 if (isNullConstant(TrueV))
9330 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9331
9332 // Check to see if a given operation is a 'NOT', if so return the negated
9333 // operand
9334 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9335 using namespace llvm::SDPatternMatch;
9336 SDValue Xor;
9337 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9338 return Xor;
9339 }
9340 return std::nullopt;
9341 };
9342 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9343 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9344 if (TrueV.getOpcode() == ISD::AND &&
9345 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9346 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9347 ? getNotOperand(TrueV.getOperand(1))
9348 : getNotOperand(TrueV.getOperand(0));
9349 if (NotOperand) {
9350 SDValue CMOV =
9351 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9352 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9353 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9354 }
9355 return DAG.getNode(
9356 ISD::OR, DL, VT, TrueV,
9357 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9358 }
9359
9360 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9361 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9362 if (FalseV.getOpcode() == ISD::AND &&
9363 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9364 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9365 ? getNotOperand(FalseV.getOperand(1))
9366 : getNotOperand(FalseV.getOperand(0));
9367 if (NotOperand) {
9368 SDValue CMOV =
9369 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9370 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9371 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9372 }
9373 return DAG.getNode(
9374 ISD::OR, DL, VT, FalseV,
9375 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9376 }
9377
9378 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9379 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9380 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9381 const APInt &TrueVal = TrueV->getAsAPIntVal();
9382 const APInt &FalseVal = FalseV->getAsAPIntVal();
9383
9384 // Prefer these over Zicond to avoid materializing an immediate:
9385 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9386 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9387 if (CondV.getOpcode() == ISD::SETCC &&
9388 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9389 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9390 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9391 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9392 int64_t TrueImm = TrueVal.getSExtValue();
9393 int64_t FalseImm = FalseVal.getSExtValue();
9394 if (CCVal == ISD::SETGT)
9395 std::swap(TrueImm, FalseImm);
9396 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9397 isInt<12>(TrueImm - FalseImm)) {
9398 SDValue SRA =
9399 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9400 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9401 SDValue AND =
9402 DAG.getNode(ISD::AND, DL, VT, SRA,
9403 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9404 return DAG.getNode(ISD::ADD, DL, VT, AND,
9405 DAG.getSignedConstant(FalseImm, DL, VT));
9406 }
9407 }
9408 }
9409
9410 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9411 // a constant in register
9412 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9413 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9414 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9415 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9416 }
9417 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9418 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9419 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9420 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9421 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9422 }
9423
9424 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9425 const int DeltaCost = RISCVMatInt::getIntMatCost(
9426 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9427 // Does the addend fold into an ADDI
9428 if (Addend.isSignedIntN(12))
9429 return DeltaCost;
9430 const int AddendCost = RISCVMatInt::getIntMatCost(
9431 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9432 return AddendCost + DeltaCost;
9433 };
9434 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9435 getCost(TrueVal - FalseVal, FalseVal);
9436 SDValue LHSVal = DAG.getConstant(
9437 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9438 SDValue CMOV =
9439 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9440 DL, VT, LHSVal, CondV);
9441 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9442 }
9443
9444 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9445 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9446 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9447 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9448 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9449 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9450 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9451 // Fall back to XORI if Const == -0x800
9452 if (RawConstVal == -0x800) {
9453 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9454 SDValue CMOV =
9455 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9456 DL, VT, XorOp, CondV);
9457 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9458 }
9459 // Efficient only if the constant and its negation fit into `ADDI`
9460 // Prefer Add/Sub over Xor since can be compressed for small immediates
9461 if (isInt<12>(RawConstVal)) {
9462 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9463 SDValue CMOV =
9464 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9465 DL, VT, SubOp, CondV);
9466 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9467 }
9468 }
9469
9470 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9471 // Unless we have the short forward branch optimization.
9472 if (!Subtarget.hasConditionalMoveFusion())
9473 return DAG.getNode(
9474 ISD::OR, DL, VT,
9475 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9476 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9478 }
9479
9480 if (Op.hasOneUse()) {
9481 unsigned UseOpc = Op->user_begin()->getOpcode();
9482 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9483 SDNode *BinOp = *Op->user_begin();
9484 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9485 DAG, Subtarget)) {
9486 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9487 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9488 // may return a constant node and cause crash in lowerSELECT.
9489 if (NewSel.getOpcode() == ISD::SELECT)
9490 return lowerSELECT(NewSel, DAG);
9491 return NewSel;
9492 }
9493 }
9494 }
9495
9496 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9497 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9498 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9499 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9500 if (FPTV && FPFV) {
9501 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9502 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9503 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9504 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9505 DAG.getConstant(1, DL, XLenVT));
9506 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9507 }
9508 }
9509
9510 // If the condition is not an integer SETCC which operates on XLenVT, we need
9511 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9512 // (select condv, truev, falsev)
9513 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9514 if (CondV.getOpcode() != ISD::SETCC ||
9515 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9516 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9517 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9518
9519 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9520
9521 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9522 }
9523
9524 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9525 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9526 // advantage of the integer compare+branch instructions. i.e.:
9527 // (select (setcc lhs, rhs, cc), truev, falsev)
9528 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9529 SDValue LHS = CondV.getOperand(0);
9530 SDValue RHS = CondV.getOperand(1);
9531 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9532
9533 // Special case for a select of 2 constants that have a difference of 1.
9534 // Normally this is done by DAGCombine, but if the select is introduced by
9535 // type legalization or op legalization, we miss it. Restricting to SETLT
9536 // case for now because that is what signed saturating add/sub need.
9537 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9538 // but we would probably want to swap the true/false values if the condition
9539 // is SETGE/SETLE to avoid an XORI.
9540 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9541 CCVal == ISD::SETLT) {
9542 const APInt &TrueVal = TrueV->getAsAPIntVal();
9543 const APInt &FalseVal = FalseV->getAsAPIntVal();
9544 if (TrueVal - 1 == FalseVal)
9545 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9546 if (TrueVal + 1 == FalseVal)
9547 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9548 }
9549
9550 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9551 // 1 < x ? x : 1 -> 0 < x ? x : 1
9552 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9553 RHS == TrueV && LHS == FalseV) {
9554 LHS = DAG.getConstant(0, DL, VT);
9555 // 0 <u x is the same as x != 0.
9556 if (CCVal == ISD::SETULT) {
9557 std::swap(LHS, RHS);
9558 CCVal = ISD::SETNE;
9559 }
9560 }
9561
9562 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9563 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9564 RHS == FalseV) {
9565 RHS = DAG.getConstant(0, DL, VT);
9566 }
9567
9568 SDValue TargetCC = DAG.getCondCode(CCVal);
9569
9570 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9571 // (select (setcc lhs, rhs, CC), constant, falsev)
9572 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9573 std::swap(TrueV, FalseV);
9574 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9575 }
9576
9577 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9578 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9579}
9580
9581SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9582 SDValue CondV = Op.getOperand(1);
9583 SDLoc DL(Op);
9584 MVT XLenVT = Subtarget.getXLenVT();
9585
9586 if (CondV.getOpcode() == ISD::SETCC &&
9587 CondV.getOperand(0).getValueType() == XLenVT) {
9588 SDValue LHS = CondV.getOperand(0);
9589 SDValue RHS = CondV.getOperand(1);
9590 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9591
9592 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9593
9594 SDValue TargetCC = DAG.getCondCode(CCVal);
9595 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9596 LHS, RHS, TargetCC, Op.getOperand(2));
9597 }
9598
9599 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9600 CondV, DAG.getConstant(0, DL, XLenVT),
9601 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9602}
9603
9604SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9605 MachineFunction &MF = DAG.getMachineFunction();
9606 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9607
9608 SDLoc DL(Op);
9609 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9611
9612 // vastart just stores the address of the VarArgsFrameIndex slot into the
9613 // memory location argument.
9614 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9615 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9616 MachinePointerInfo(SV));
9617}
9618
9619SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9620 SelectionDAG &DAG) const {
9621 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9622 MachineFunction &MF = DAG.getMachineFunction();
9623 MachineFrameInfo &MFI = MF.getFrameInfo();
9624 MFI.setFrameAddressIsTaken(true);
9625 Register FrameReg = RI.getFrameRegister(MF);
9626 int XLenInBytes = Subtarget.getXLen() / 8;
9627
9628 EVT VT = Op.getValueType();
9629 SDLoc DL(Op);
9630 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9631 unsigned Depth = Op.getConstantOperandVal(0);
9632 while (Depth--) {
9633 int Offset = -(XLenInBytes * 2);
9634 SDValue Ptr = DAG.getNode(
9635 ISD::ADD, DL, VT, FrameAddr,
9637 FrameAddr =
9638 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9639 }
9640 return FrameAddr;
9641}
9642
9643SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9644 SelectionDAG &DAG) const {
9645 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9646 MachineFunction &MF = DAG.getMachineFunction();
9647 MachineFrameInfo &MFI = MF.getFrameInfo();
9648 MFI.setReturnAddressIsTaken(true);
9649 MVT XLenVT = Subtarget.getXLenVT();
9650 int XLenInBytes = Subtarget.getXLen() / 8;
9651
9652 EVT VT = Op.getValueType();
9653 SDLoc DL(Op);
9654 unsigned Depth = Op.getConstantOperandVal(0);
9655 if (Depth) {
9656 int Off = -XLenInBytes;
9657 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9658 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9659 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9660 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9661 MachinePointerInfo());
9662 }
9663
9664 // Return the value of the return address register, marking it an implicit
9665 // live-in.
9666 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9667 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9668}
9669
9670SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9671 SelectionDAG &DAG) const {
9672 SDLoc DL(Op);
9673 SDValue Lo = Op.getOperand(0);
9674 SDValue Hi = Op.getOperand(1);
9675 SDValue Shamt = Op.getOperand(2);
9676 EVT VT = Lo.getValueType();
9677
9678 // if Shamt-XLEN < 0: // Shamt < XLEN
9679 // Lo = Lo << Shamt
9680 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9681 // else:
9682 // Lo = 0
9683 // Hi = Lo << (Shamt-XLEN)
9684
9685 SDValue Zero = DAG.getConstant(0, DL, VT);
9686 SDValue One = DAG.getConstant(1, DL, VT);
9687 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9688 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9689 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9690 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9691
9692 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9693 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9694 SDValue ShiftRightLo =
9695 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9696 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9697 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9698 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9699
9700 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9701
9702 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9703 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9704
9705 SDValue Parts[2] = {Lo, Hi};
9706 return DAG.getMergeValues(Parts, DL);
9707}
9708
9709SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9710 bool IsSRA) const {
9711 SDLoc DL(Op);
9712 SDValue Lo = Op.getOperand(0);
9713 SDValue Hi = Op.getOperand(1);
9714 SDValue Shamt = Op.getOperand(2);
9715 EVT VT = Lo.getValueType();
9716
9717 // SRA expansion:
9718 // if Shamt-XLEN < 0: // Shamt < XLEN
9719 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9720 // Hi = Hi >>s Shamt
9721 // else:
9722 // Lo = Hi >>s (Shamt-XLEN);
9723 // Hi = Hi >>s (XLEN-1)
9724 //
9725 // SRL expansion:
9726 // if Shamt-XLEN < 0: // Shamt < XLEN
9727 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9728 // Hi = Hi >>u Shamt
9729 // else:
9730 // Lo = Hi >>u (Shamt-XLEN);
9731 // Hi = 0;
9732
9733 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9734
9735 SDValue Zero = DAG.getConstant(0, DL, VT);
9736 SDValue One = DAG.getConstant(1, DL, VT);
9737 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9738 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9739 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9740 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9741
9742 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9743 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9744 SDValue ShiftLeftHi =
9745 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9746 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9747 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9748 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9749 SDValue HiFalse =
9750 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9751
9752 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9753
9754 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9755 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9756
9757 SDValue Parts[2] = {Lo, Hi};
9758 return DAG.getMergeValues(Parts, DL);
9759}
9760
9761// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9762// legal equivalently-sized i8 type, so we can use that as a go-between.
9763SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9764 SelectionDAG &DAG) const {
9765 SDLoc DL(Op);
9766 MVT VT = Op.getSimpleValueType();
9767 SDValue SplatVal = Op.getOperand(0);
9768 // All-zeros or all-ones splats are handled specially.
9769 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9770 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9771 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9772 }
9773 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9774 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9775 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9776 }
9777 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9778 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9779 DAG.getConstant(1, DL, SplatVal.getValueType()));
9780 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9781 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9782 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9783}
9784
9785// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9786// illegal (currently only vXi64 RV32).
9787// FIXME: We could also catch non-constant sign-extended i32 values and lower
9788// them to VMV_V_X_VL.
9789SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9790 SelectionDAG &DAG) const {
9791 SDLoc DL(Op);
9792 MVT VecVT = Op.getSimpleValueType();
9793 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9794 "Unexpected SPLAT_VECTOR_PARTS lowering");
9795
9796 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9797 SDValue Lo = Op.getOperand(0);
9798 SDValue Hi = Op.getOperand(1);
9799
9800 MVT ContainerVT = VecVT;
9801 if (VecVT.isFixedLengthVector())
9802 ContainerVT = getContainerForFixedLengthVector(VecVT);
9803
9804 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9805
9806 SDValue Res =
9807 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9808
9809 if (VecVT.isFixedLengthVector())
9810 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9811
9812 return Res;
9813}
9814
9815// Custom-lower extensions from mask vectors by using a vselect either with 1
9816// for zero/any-extension or -1 for sign-extension:
9817// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9818// Note that any-extension is lowered identically to zero-extension.
9819SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9820 int64_t ExtTrueVal) const {
9821 SDLoc DL(Op);
9822 MVT VecVT = Op.getSimpleValueType();
9823 SDValue Src = Op.getOperand(0);
9824 // Only custom-lower extensions from mask types
9825 assert(Src.getValueType().isVector() &&
9826 Src.getValueType().getVectorElementType() == MVT::i1);
9827
9828 if (VecVT.isScalableVector()) {
9829 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9830 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9831 if (Src.getOpcode() == ISD::XOR &&
9832 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9833 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9834 SplatTrueVal);
9835 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9836 }
9837
9838 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9839 MVT I1ContainerVT =
9840 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9841
9842 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9843
9844 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9845
9846 MVT XLenVT = Subtarget.getXLenVT();
9847 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9848 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9849
9850 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9851 SDValue Xor = Src.getOperand(0);
9852 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9853 SDValue ScalableOnes = Xor.getOperand(1);
9854 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9855 ScalableOnes.getOperand(0).isUndef() &&
9857 ScalableOnes.getOperand(1).getNode())) {
9858 CC = Xor.getOperand(0);
9859 std::swap(SplatZero, SplatTrueVal);
9860 }
9861 }
9862 }
9863
9864 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9865 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9866 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9867 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9868 SDValue Select =
9869 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9870 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9871
9872 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9873}
9874
9875// Custom-lower truncations from vectors to mask vectors by using a mask and a
9876// setcc operation:
9877// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9878SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9879 SelectionDAG &DAG) const {
9880 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9881 SDLoc DL(Op);
9882 EVT MaskVT = Op.getValueType();
9883 // Only expect to custom-lower truncations to mask types
9884 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9885 "Unexpected type for vector mask lowering");
9886 SDValue Src = Op.getOperand(0);
9887 MVT VecVT = Src.getSimpleValueType();
9888 SDValue Mask, VL;
9889 if (IsVPTrunc) {
9890 Mask = Op.getOperand(1);
9891 VL = Op.getOperand(2);
9892 }
9893 // If this is a fixed vector, we need to convert it to a scalable vector.
9894 MVT ContainerVT = VecVT;
9895
9896 if (VecVT.isFixedLengthVector()) {
9897 ContainerVT = getContainerForFixedLengthVector(VecVT);
9898 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9899 if (IsVPTrunc) {
9900 MVT MaskContainerVT =
9901 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9902 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9903 }
9904 }
9905
9906 if (!IsVPTrunc) {
9907 std::tie(Mask, VL) =
9908 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9909 }
9910
9911 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9912 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9913
9914 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9915 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9916 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9917 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9918
9919 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9920 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9921 DAG.getUNDEF(ContainerVT), Mask, VL);
9922 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9923 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9924 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9925 if (MaskVT.isFixedLengthVector())
9926 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
9927 return Trunc;
9928}
9929
9930SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
9931 SelectionDAG &DAG) const {
9932 unsigned Opc = Op.getOpcode();
9933 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
9934 SDLoc DL(Op);
9935
9936 MVT VT = Op.getSimpleValueType();
9937 // Only custom-lower vector truncates
9938 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9939
9940 // Truncates to mask types are handled differently
9941 if (VT.getVectorElementType() == MVT::i1)
9942 return lowerVectorMaskTruncLike(Op, DAG);
9943
9944 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
9945 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
9946 // truncate by one power of two at a time.
9947 MVT DstEltVT = VT.getVectorElementType();
9948
9949 SDValue Src = Op.getOperand(0);
9950 MVT SrcVT = Src.getSimpleValueType();
9951 MVT SrcEltVT = SrcVT.getVectorElementType();
9952
9953 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
9954 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
9955 "Unexpected vector truncate lowering");
9956
9957 MVT ContainerVT = SrcVT;
9958 SDValue Mask, VL;
9959 if (IsVPTrunc) {
9960 Mask = Op.getOperand(1);
9961 VL = Op.getOperand(2);
9962 }
9963 if (SrcVT.isFixedLengthVector()) {
9964 ContainerVT = getContainerForFixedLengthVector(SrcVT);
9965 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9966 if (IsVPTrunc) {
9967 MVT MaskVT = getMaskTypeFor(ContainerVT);
9968 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9969 }
9970 }
9971
9972 SDValue Result = Src;
9973 if (!IsVPTrunc) {
9974 std::tie(Mask, VL) =
9975 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9976 }
9977
9978 unsigned NewOpc;
9980 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
9981 else if (Opc == ISD::TRUNCATE_USAT_U)
9982 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
9983 else
9984 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
9985
9986 do {
9987 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
9988 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
9989 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
9990 } while (SrcEltVT != DstEltVT);
9991
9992 if (SrcVT.isFixedLengthVector())
9993 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9994
9995 return Result;
9996}
9997
9998SDValue
9999RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
10000 SelectionDAG &DAG) const {
10001 SDLoc DL(Op);
10002 SDValue Chain = Op.getOperand(0);
10003 SDValue Src = Op.getOperand(1);
10004 MVT VT = Op.getSimpleValueType();
10005 MVT SrcVT = Src.getSimpleValueType();
10006 MVT ContainerVT = VT;
10007 if (VT.isFixedLengthVector()) {
10008 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10009 ContainerVT =
10010 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10011 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10012 }
10013
10014 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10015
10016 // RVV can only widen/truncate fp to types double/half the size as the source.
10017 if ((VT.getVectorElementType() == MVT::f64 &&
10018 (SrcVT.getVectorElementType() == MVT::f16 ||
10019 SrcVT.getVectorElementType() == MVT::bf16)) ||
10020 ((VT.getVectorElementType() == MVT::f16 ||
10021 VT.getVectorElementType() == MVT::bf16) &&
10022 SrcVT.getVectorElementType() == MVT::f64)) {
10023 // For double rounding, the intermediate rounding should be round-to-odd.
10024 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10025 ? RISCVISD::STRICT_FP_EXTEND_VL
10026 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10027 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10028 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10029 Chain, Src, Mask, VL);
10030 Chain = Src.getValue(1);
10031 }
10032
10033 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10034 ? RISCVISD::STRICT_FP_EXTEND_VL
10035 : RISCVISD::STRICT_FP_ROUND_VL;
10036 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10037 Chain, Src, Mask, VL);
10038 if (VT.isFixedLengthVector()) {
10039 // StrictFP operations have two result values. Their lowered result should
10040 // have same result count.
10041 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10042 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10043 }
10044 return Res;
10045}
10046
10047SDValue
10048RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10049 SelectionDAG &DAG) const {
10050 bool IsVP =
10051 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10052 bool IsExtend =
10053 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10054 // RVV can only do truncate fp to types half the size as the source. We
10055 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10056 // conversion instruction.
10057 SDLoc DL(Op);
10058 MVT VT = Op.getSimpleValueType();
10059
10060 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10061
10062 SDValue Src = Op.getOperand(0);
10063 MVT SrcVT = Src.getSimpleValueType();
10064
10065 bool IsDirectExtend =
10066 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10067 (SrcVT.getVectorElementType() != MVT::f16 &&
10068 SrcVT.getVectorElementType() != MVT::bf16));
10069 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10070 VT.getVectorElementType() != MVT::bf16) ||
10071 SrcVT.getVectorElementType() != MVT::f64);
10072
10073 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10074
10075 // We have regular SD node patterns for direct non-VL extends.
10076 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10077 return Op;
10078
10079 // Prepare any fixed-length vector operands.
10080 MVT ContainerVT = VT;
10081 SDValue Mask, VL;
10082 if (IsVP) {
10083 Mask = Op.getOperand(1);
10084 VL = Op.getOperand(2);
10085 }
10086 if (VT.isFixedLengthVector()) {
10087 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10088 ContainerVT =
10089 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10090 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10091 if (IsVP) {
10092 MVT MaskVT = getMaskTypeFor(ContainerVT);
10093 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10094 }
10095 }
10096
10097 if (!IsVP)
10098 std::tie(Mask, VL) =
10099 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10100
10101 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10102
10103 if (IsDirectConv) {
10104 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10105 if (VT.isFixedLengthVector())
10106 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10107 return Src;
10108 }
10109
10110 unsigned InterConvOpc =
10111 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10112
10113 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10114 SDValue IntermediateConv =
10115 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10116 SDValue Result =
10117 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10118 if (VT.isFixedLengthVector())
10119 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10120 return Result;
10121}
10122
10123// Given a scalable vector type and an index into it, returns the type for the
10124// smallest subvector that the index fits in. This can be used to reduce LMUL
10125// for operations like vslidedown.
10126//
10127// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10128static std::optional<MVT>
10129getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10130 const RISCVSubtarget &Subtarget) {
10131 assert(VecVT.isScalableVector());
10132 const unsigned EltSize = VecVT.getScalarSizeInBits();
10133 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10134 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10135 MVT SmallerVT;
10136 if (MaxIdx < MinVLMAX)
10137 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10138 else if (MaxIdx < MinVLMAX * 2)
10139 SmallerVT =
10141 else if (MaxIdx < MinVLMAX * 4)
10142 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10145 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10146 return std::nullopt;
10147 return SmallerVT;
10148}
10149
10151 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10152 if (!IdxC || isNullConstant(Idx))
10153 return false;
10154 return isUInt<5>(IdxC->getZExtValue());
10155}
10156
10157// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10158// first position of a vector, and that vector is slid up to the insert index.
10159// By limiting the active vector length to index+1 and merging with the
10160// original vector (with an undisturbed tail policy for elements >= VL), we
10161// achieve the desired result of leaving all elements untouched except the one
10162// at VL-1, which is replaced with the desired value.
10163SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10164 SelectionDAG &DAG) const {
10165 SDLoc DL(Op);
10166 MVT VecVT = Op.getSimpleValueType();
10167 MVT XLenVT = Subtarget.getXLenVT();
10168 SDValue Vec = Op.getOperand(0);
10169 SDValue Val = Op.getOperand(1);
10170 MVT ValVT = Val.getSimpleValueType();
10171 SDValue Idx = Op.getOperand(2);
10172
10173 if (VecVT.getVectorElementType() == MVT::i1) {
10174 // FIXME: For now we just promote to an i8 vector and insert into that,
10175 // but this is probably not optimal.
10176 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10177 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10178 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10179 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10180 }
10181
10182 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10183 ValVT == MVT::bf16) {
10184 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10185 MVT IntVT = VecVT.changeTypeToInteger();
10186 SDValue IntInsert = DAG.getNode(
10187 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10188 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10189 return DAG.getBitcast(VecVT, IntInsert);
10190 }
10191
10192 MVT ContainerVT = VecVT;
10193 // If the operand is a fixed-length vector, convert to a scalable one.
10194 if (VecVT.isFixedLengthVector()) {
10195 ContainerVT = getContainerForFixedLengthVector(VecVT);
10196 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10197 }
10198
10199 // If we know the index we're going to insert at, we can shrink Vec so that
10200 // we're performing the scalar inserts and slideup on a smaller LMUL.
10201 SDValue OrigVec = Vec;
10202 std::optional<unsigned> AlignedIdx;
10203 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10204 const unsigned OrigIdx = IdxC->getZExtValue();
10205 // Do we know an upper bound on LMUL?
10206 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10207 DL, DAG, Subtarget)) {
10208 ContainerVT = *ShrunkVT;
10209 AlignedIdx = 0;
10210 }
10211
10212 // If we're compiling for an exact VLEN value, we can always perform
10213 // the insert in m1 as we can determine the register corresponding to
10214 // the index in the register group.
10215 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10216 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10217 EVT ElemVT = VecVT.getVectorElementType();
10218 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10219 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10220 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10221 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10222 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10223 ContainerVT = M1VT;
10224 }
10225
10226 if (AlignedIdx)
10227 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10228 }
10229
10230 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10231 // Even i64-element vectors on RV32 can be lowered without scalar
10232 // legalization if the most-significant 32 bits of the value are not affected
10233 // by the sign-extension of the lower 32 bits.
10234 // TODO: We could also catch sign extensions of a 32-bit value.
10235 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10236 const auto *CVal = cast<ConstantSDNode>(Val);
10237 if (isInt<32>(CVal->getSExtValue())) {
10238 IsLegalInsert = true;
10239 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10240 }
10241 }
10242
10243 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10244
10245 SDValue ValInVec;
10246
10247 if (IsLegalInsert) {
10248 unsigned Opc =
10249 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10250 if (isNullConstant(Idx)) {
10251 if (!VecVT.isFloatingPoint())
10252 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10253 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10254
10255 if (AlignedIdx)
10256 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10257 if (!VecVT.isFixedLengthVector())
10258 return Vec;
10259 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10260 }
10261
10262 // Use ri.vinsert.v.x if available.
10263 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10265 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10266 SDValue PolicyOp =
10268 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10269 VL, PolicyOp);
10270 if (AlignedIdx)
10271 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10272 if (!VecVT.isFixedLengthVector())
10273 return Vec;
10274 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10275 }
10276
10277 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10278 } else {
10279 // On RV32, i64-element vectors must be specially handled to place the
10280 // value at element 0, by using two vslide1down instructions in sequence on
10281 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10282 // this.
10283 SDValue ValLo, ValHi;
10284 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10285 MVT I32ContainerVT =
10286 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10287 SDValue I32Mask =
10288 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10289 // Limit the active VL to two.
10290 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10291 // If the Idx is 0 we can insert directly into the vector.
10292 if (isNullConstant(Idx)) {
10293 // First slide in the lo value, then the hi in above it. We use slide1down
10294 // to avoid the register group overlap constraint of vslide1up.
10295 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10296 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10297 // If the source vector is undef don't pass along the tail elements from
10298 // the previous slide1down.
10299 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10300 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10301 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10302 // Bitcast back to the right container type.
10303 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10304
10305 if (AlignedIdx)
10306 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10307 if (!VecVT.isFixedLengthVector())
10308 return ValInVec;
10309 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10310 }
10311
10312 // First slide in the lo value, then the hi in above it. We use slide1down
10313 // to avoid the register group overlap constraint of vslide1up.
10314 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10315 DAG.getUNDEF(I32ContainerVT),
10316 DAG.getUNDEF(I32ContainerVT), ValLo,
10317 I32Mask, InsertI64VL);
10318 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10319 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10320 I32Mask, InsertI64VL);
10321 // Bitcast back to the right container type.
10322 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10323 }
10324
10325 // Now that the value is in a vector, slide it into position.
10326 SDValue InsertVL =
10327 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10328
10329 // Use tail agnostic policy if Idx is the last index of Vec.
10331 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10332 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10334 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10335 Idx, Mask, InsertVL, Policy);
10336
10337 if (AlignedIdx)
10338 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10339 if (!VecVT.isFixedLengthVector())
10340 return Slideup;
10341 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10342}
10343
10344// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10345// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10346// types this is done using VMV_X_S to allow us to glean information about the
10347// sign bits of the result.
10348SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10349 SelectionDAG &DAG) const {
10350 SDLoc DL(Op);
10351 SDValue Idx = Op.getOperand(1);
10352 SDValue Vec = Op.getOperand(0);
10353 EVT EltVT = Op.getValueType();
10354 MVT VecVT = Vec.getSimpleValueType();
10355 MVT XLenVT = Subtarget.getXLenVT();
10356
10357 if (VecVT.getVectorElementType() == MVT::i1) {
10358 // Use vfirst.m to extract the first bit.
10359 if (isNullConstant(Idx)) {
10360 MVT ContainerVT = VecVT;
10361 if (VecVT.isFixedLengthVector()) {
10362 ContainerVT = getContainerForFixedLengthVector(VecVT);
10363 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10364 }
10365 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10366 SDValue Vfirst =
10367 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10368 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10369 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10370 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10371 }
10372 if (VecVT.isFixedLengthVector()) {
10373 unsigned NumElts = VecVT.getVectorNumElements();
10374 if (NumElts >= 8) {
10375 MVT WideEltVT;
10376 unsigned WidenVecLen;
10377 SDValue ExtractElementIdx;
10378 SDValue ExtractBitIdx;
10379 unsigned MaxEEW = Subtarget.getELen();
10380 MVT LargestEltVT = MVT::getIntegerVT(
10381 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10382 if (NumElts <= LargestEltVT.getSizeInBits()) {
10383 assert(isPowerOf2_32(NumElts) &&
10384 "the number of elements should be power of 2");
10385 WideEltVT = MVT::getIntegerVT(NumElts);
10386 WidenVecLen = 1;
10387 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10388 ExtractBitIdx = Idx;
10389 } else {
10390 WideEltVT = LargestEltVT;
10391 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10392 // extract element index = index / element width
10393 ExtractElementIdx = DAG.getNode(
10394 ISD::SRL, DL, XLenVT, Idx,
10395 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10396 // mask bit index = index % element width
10397 ExtractBitIdx = DAG.getNode(
10398 ISD::AND, DL, XLenVT, Idx,
10399 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10400 }
10401 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10402 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10403 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10404 Vec, ExtractElementIdx);
10405 // Extract the bit from GPR.
10406 SDValue ShiftRight =
10407 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10408 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10409 DAG.getConstant(1, DL, XLenVT));
10410 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10411 }
10412 }
10413 // Otherwise, promote to an i8 vector and extract from that.
10414 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10415 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10416 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10417 }
10418
10419 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10420 EltVT == MVT::bf16) {
10421 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10422 MVT IntVT = VecVT.changeTypeToInteger();
10423 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10424 SDValue IntExtract =
10425 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10426 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10427 }
10428
10429 // If this is a fixed vector, we need to convert it to a scalable vector.
10430 MVT ContainerVT = VecVT;
10431 if (VecVT.isFixedLengthVector()) {
10432 ContainerVT = getContainerForFixedLengthVector(VecVT);
10433 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10434 }
10435
10436 // If we're compiling for an exact VLEN value and we have a known
10437 // constant index, we can always perform the extract in m1 (or
10438 // smaller) as we can determine the register corresponding to
10439 // the index in the register group.
10440 const auto VLen = Subtarget.getRealVLen();
10441 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10442 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10443 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10444 unsigned OrigIdx = IdxC->getZExtValue();
10445 EVT ElemVT = VecVT.getVectorElementType();
10446 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10447 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10448 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10449 unsigned ExtractIdx =
10450 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10451 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10452 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10453 ContainerVT = M1VT;
10454 }
10455
10456 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10457 // contains our index.
10458 std::optional<uint64_t> MaxIdx;
10459 if (VecVT.isFixedLengthVector())
10460 MaxIdx = VecVT.getVectorNumElements() - 1;
10461 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10462 MaxIdx = IdxC->getZExtValue();
10463 if (MaxIdx) {
10464 if (auto SmallerVT =
10465 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10466 ContainerVT = *SmallerVT;
10467 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10468 }
10469 }
10470
10471 // Use ri.vextract.x.v if available.
10472 // TODO: Avoid index 0 and just use the vmv.x.s
10473 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10475 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10476 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10477 }
10478
10479 // If after narrowing, the required slide is still greater than LMUL2,
10480 // fallback to generic expansion and go through the stack. This is done
10481 // for a subtle reason: extracting *all* elements out of a vector is
10482 // widely expected to be linear in vector size, but because vslidedown
10483 // is linear in LMUL, performing N extracts using vslidedown becomes
10484 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10485 // seems to have the same problem (the store is linear in LMUL), but the
10486 // generic expansion *memoizes* the store, and thus for many extracts of
10487 // the same vector we end up with one store and a bunch of loads.
10488 // TODO: We don't have the same code for insert_vector_elt because we
10489 // have BUILD_VECTOR and handle the degenerate case there. Should we
10490 // consider adding an inverse BUILD_VECTOR node?
10491 MVT LMUL2VT =
10493 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10494 return SDValue();
10495
10496 // If the index is 0, the vector is already in the right position.
10497 if (!isNullConstant(Idx)) {
10498 // Use a VL of 1 to avoid processing more elements than we need.
10499 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10500 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10501 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10502 }
10503
10504 if (!EltVT.isInteger()) {
10505 // Floating-point extracts are handled in TableGen.
10506 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10507 }
10508
10509 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10510 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10511}
10512
10513// Some RVV intrinsics may claim that they want an integer operand to be
10514// promoted or expanded.
10516 const RISCVSubtarget &Subtarget) {
10517 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10518 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10519 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10520 "Unexpected opcode");
10521
10522 if (!Subtarget.hasVInstructions())
10523 return SDValue();
10524
10525 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10526 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10527 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10528
10529 SDLoc DL(Op);
10530
10532 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10533 if (!II || !II->hasScalarOperand())
10534 return SDValue();
10535
10536 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10537 assert(SplatOp < Op.getNumOperands());
10538
10539 SmallVector<SDValue, 8> Operands(Op->ops());
10540 SDValue &ScalarOp = Operands[SplatOp];
10541 MVT OpVT = ScalarOp.getSimpleValueType();
10542 MVT XLenVT = Subtarget.getXLenVT();
10543
10544 // If this isn't a scalar, or its type is XLenVT we're done.
10545 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10546 return SDValue();
10547
10548 // Simplest case is that the operand needs to be promoted to XLenVT.
10549 if (OpVT.bitsLT(XLenVT)) {
10550 // If the operand is a constant, sign extend to increase our chances
10551 // of being able to use a .vi instruction. ANY_EXTEND would become a
10552 // a zero extend and the simm5 check in isel would fail.
10553 // FIXME: Should we ignore the upper bits in isel instead?
10554 unsigned ExtOpc =
10556 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10557 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10558 }
10559
10560 // Use the previous operand to get the vXi64 VT. The result might be a mask
10561 // VT for compares. Using the previous operand assumes that the previous
10562 // operand will never have a smaller element size than a scalar operand and
10563 // that a widening operation never uses SEW=64.
10564 // NOTE: If this fails the below assert, we can probably just find the
10565 // element count from any operand or result and use it to construct the VT.
10566 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10567 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10568
10569 // The more complex case is when the scalar is larger than XLenVT.
10570 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10571 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10572
10573 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10574 // instruction to sign-extend since SEW>XLEN.
10575 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10576 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10577 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10578 }
10579
10580 switch (IntNo) {
10581 case Intrinsic::riscv_vslide1up:
10582 case Intrinsic::riscv_vslide1down:
10583 case Intrinsic::riscv_vslide1up_mask:
10584 case Intrinsic::riscv_vslide1down_mask: {
10585 // We need to special case these when the scalar is larger than XLen.
10586 unsigned NumOps = Op.getNumOperands();
10587 bool IsMasked = NumOps == 7;
10588
10589 // Convert the vector source to the equivalent nxvXi32 vector.
10590 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10591 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10592 SDValue ScalarLo, ScalarHi;
10593 std::tie(ScalarLo, ScalarHi) =
10594 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10595
10596 // Double the VL since we halved SEW.
10597 SDValue AVL = getVLOperand(Op);
10598 SDValue I32VL;
10599
10600 // Optimize for constant AVL
10601 if (isa<ConstantSDNode>(AVL)) {
10602 const auto [MinVLMAX, MaxVLMAX] =
10604
10605 uint64_t AVLInt = AVL->getAsZExtVal();
10606 if (AVLInt <= MinVLMAX) {
10607 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10608 } else if (AVLInt >= 2 * MaxVLMAX) {
10609 // Just set vl to VLMAX in this situation
10610 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10611 } else {
10612 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10613 // is related to the hardware implementation.
10614 // So let the following code handle
10615 }
10616 }
10617 if (!I32VL) {
10619 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10620 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10621 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10622 SDValue SETVL =
10623 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10624 // Using vsetvli instruction to get actually used length which related to
10625 // the hardware implementation
10626 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10627 SEW, LMUL);
10628 I32VL =
10629 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10630 }
10631
10632 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10633
10634 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10635 // instructions.
10636 SDValue Passthru;
10637 if (IsMasked)
10638 Passthru = DAG.getUNDEF(I32VT);
10639 else
10640 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10641
10642 if (IntNo == Intrinsic::riscv_vslide1up ||
10643 IntNo == Intrinsic::riscv_vslide1up_mask) {
10644 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10645 ScalarHi, I32Mask, I32VL);
10646 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10647 ScalarLo, I32Mask, I32VL);
10648 } else {
10649 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10650 ScalarLo, I32Mask, I32VL);
10651 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10652 ScalarHi, I32Mask, I32VL);
10653 }
10654
10655 // Convert back to nxvXi64.
10656 Vec = DAG.getBitcast(VT, Vec);
10657
10658 if (!IsMasked)
10659 return Vec;
10660 // Apply mask after the operation.
10661 SDValue Mask = Operands[NumOps - 3];
10662 SDValue MaskedOff = Operands[1];
10663 // Assume Policy operand is the last operand.
10664 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10665 // We don't need to select maskedoff if it's undef.
10666 if (MaskedOff.isUndef())
10667 return Vec;
10668 // TAMU
10669 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10670 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10671 DAG.getUNDEF(VT), AVL);
10672 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10673 // It's fine because vmerge does not care mask policy.
10674 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10675 MaskedOff, AVL);
10676 }
10677 }
10678
10679 // We need to convert the scalar to a splat vector.
10680 SDValue VL = getVLOperand(Op);
10681 assert(VL.getValueType() == XLenVT);
10682 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10683 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10684}
10685
10686// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10687// scalable vector llvm.get.vector.length for now.
10688//
10689// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10690// (vscale * VF). The vscale and VF are independent of element width. We use
10691// SEW=8 for the vsetvli because it is the only element width that supports all
10692// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10693// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10694// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10695// SEW and LMUL are better for the surrounding vector instructions.
10697 const RISCVSubtarget &Subtarget) {
10698 MVT XLenVT = Subtarget.getXLenVT();
10699
10700 // The smallest LMUL is only valid for the smallest element width.
10701 const unsigned ElementWidth = 8;
10702
10703 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10704 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10705 // We don't support VF==1 with ELEN==32.
10706 [[maybe_unused]] unsigned MinVF =
10707 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10708
10709 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10710 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10711 "Unexpected VF");
10712
10713 bool Fractional = VF < LMul1VF;
10714 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10715 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10716 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10717
10718 SDLoc DL(N);
10719
10720 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10721 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10722
10723 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10724
10725 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10726 SDValue Res =
10727 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10728 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10729}
10730
10732 const RISCVSubtarget &Subtarget) {
10733 SDValue Op0 = N->getOperand(1);
10734 MVT OpVT = Op0.getSimpleValueType();
10735 MVT ContainerVT = OpVT;
10736 if (OpVT.isFixedLengthVector()) {
10737 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10738 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10739 }
10740 MVT XLenVT = Subtarget.getXLenVT();
10741 SDLoc DL(N);
10742 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10743 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10744 if (isOneConstant(N->getOperand(2)))
10745 return Res;
10746
10747 // Convert -1 to VL.
10748 SDValue Setcc =
10749 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10750 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10751 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10752}
10753
10754static inline void promoteVCIXScalar(SDValue Op,
10755 MutableArrayRef<SDValue> Operands,
10756 SelectionDAG &DAG) {
10757 const RISCVSubtarget &Subtarget =
10759
10760 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10761 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10762 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10763 SDLoc DL(Op);
10764
10766 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10767 if (!II || !II->hasScalarOperand())
10768 return;
10769
10770 unsigned SplatOp = II->ScalarOperand + 1;
10771 assert(SplatOp < Op.getNumOperands());
10772
10773 SDValue &ScalarOp = Operands[SplatOp];
10774 MVT OpVT = ScalarOp.getSimpleValueType();
10775 MVT XLenVT = Subtarget.getXLenVT();
10776
10777 // The code below is partially copied from lowerVectorIntrinsicScalars.
10778 // If this isn't a scalar, or its type is XLenVT we're done.
10779 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10780 return;
10781
10782 // Manually emit promote operation for scalar operation.
10783 if (OpVT.bitsLT(XLenVT)) {
10784 unsigned ExtOpc =
10786 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10787 }
10788}
10789
10790static void processVCIXOperands(SDValue OrigOp,
10791 MutableArrayRef<SDValue> Operands,
10792 SelectionDAG &DAG) {
10793 promoteVCIXScalar(OrigOp, Operands, DAG);
10794 const RISCVSubtarget &Subtarget =
10796 for (SDValue &V : Operands) {
10797 EVT ValType = V.getValueType();
10798 if (ValType.isVector() && ValType.isFloatingPoint()) {
10799 MVT InterimIVT =
10800 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10801 ValType.getVectorElementCount());
10802 V = DAG.getBitcast(InterimIVT, V);
10803 }
10804 if (ValType.isFixedLengthVector()) {
10805 MVT OpContainerVT = getContainerForFixedLengthVector(
10806 DAG, V.getSimpleValueType(), Subtarget);
10807 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10808 }
10809 }
10810}
10811
10812// LMUL * VLEN should be greater than or equal to EGS * SEW
10813static inline bool isValidEGW(int EGS, EVT VT,
10814 const RISCVSubtarget &Subtarget) {
10815 return (Subtarget.getRealMinVLen() *
10817 EGS * VT.getScalarSizeInBits();
10818}
10819
10820SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10821 SelectionDAG &DAG) const {
10822 unsigned IntNo = Op.getConstantOperandVal(0);
10823 SDLoc DL(Op);
10824 MVT XLenVT = Subtarget.getXLenVT();
10825
10826 switch (IntNo) {
10827 default:
10828 break; // Don't custom lower most intrinsics.
10829 case Intrinsic::riscv_tuple_insert: {
10830 SDValue Vec = Op.getOperand(1);
10831 SDValue SubVec = Op.getOperand(2);
10832 SDValue Index = Op.getOperand(3);
10833
10834 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10835 SubVec, Index);
10836 }
10837 case Intrinsic::riscv_tuple_extract: {
10838 SDValue Vec = Op.getOperand(1);
10839 SDValue Index = Op.getOperand(2);
10840
10841 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10842 Index);
10843 }
10844 case Intrinsic::thread_pointer: {
10845 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10846 return DAG.getRegister(RISCV::X4, PtrVT);
10847 }
10848 case Intrinsic::riscv_orc_b:
10849 case Intrinsic::riscv_brev8:
10850 case Intrinsic::riscv_sha256sig0:
10851 case Intrinsic::riscv_sha256sig1:
10852 case Intrinsic::riscv_sha256sum0:
10853 case Intrinsic::riscv_sha256sum1:
10854 case Intrinsic::riscv_sm3p0:
10855 case Intrinsic::riscv_sm3p1: {
10856 unsigned Opc;
10857 switch (IntNo) {
10858 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10859 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10860 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10861 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10862 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10863 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10864 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10865 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10866 }
10867
10868 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10869 }
10870 case Intrinsic::riscv_sm4ks:
10871 case Intrinsic::riscv_sm4ed: {
10872 unsigned Opc =
10873 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10874
10875 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10876 Op.getOperand(3));
10877 }
10878 case Intrinsic::riscv_zip:
10879 case Intrinsic::riscv_unzip: {
10880 unsigned Opc =
10881 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10882 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10883 }
10884 case Intrinsic::riscv_mopr:
10885 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10886 Op.getOperand(2));
10887
10888 case Intrinsic::riscv_moprr: {
10889 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10890 Op.getOperand(2), Op.getOperand(3));
10891 }
10892 case Intrinsic::riscv_clmul:
10893 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10894 Op.getOperand(2));
10895 case Intrinsic::riscv_clmulh:
10896 case Intrinsic::riscv_clmulr: {
10897 unsigned Opc =
10898 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10899 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10900 }
10901 case Intrinsic::experimental_get_vector_length:
10902 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10903 case Intrinsic::experimental_cttz_elts:
10904 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10905 case Intrinsic::riscv_vmv_x_s: {
10906 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10907 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10908 }
10909 case Intrinsic::riscv_vfmv_f_s:
10910 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10911 case Intrinsic::riscv_vmv_v_x:
10912 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10913 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10914 Subtarget);
10915 case Intrinsic::riscv_vfmv_v_f:
10916 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10917 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10918 case Intrinsic::riscv_vmv_s_x: {
10919 SDValue Scalar = Op.getOperand(2);
10920
10921 if (Scalar.getValueType().bitsLE(XLenVT)) {
10922 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10923 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10924 Op.getOperand(1), Scalar, Op.getOperand(3));
10925 }
10926
10927 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
10928
10929 // This is an i64 value that lives in two scalar registers. We have to
10930 // insert this in a convoluted way. First we build vXi64 splat containing
10931 // the two values that we assemble using some bit math. Next we'll use
10932 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
10933 // to merge element 0 from our splat into the source vector.
10934 // FIXME: This is probably not the best way to do this, but it is
10935 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
10936 // point.
10937 // sw lo, (a0)
10938 // sw hi, 4(a0)
10939 // vlse vX, (a0)
10940 //
10941 // vid.v vVid
10942 // vmseq.vx mMask, vVid, 0
10943 // vmerge.vvm vDest, vSrc, vVal, mMask
10944 MVT VT = Op.getSimpleValueType();
10945 SDValue Vec = Op.getOperand(1);
10946 SDValue VL = getVLOperand(Op);
10947
10948 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
10949 if (Op.getOperand(1).isUndef())
10950 return SplattedVal;
10951 SDValue SplattedIdx =
10952 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10953 DAG.getConstant(0, DL, MVT::i32), VL);
10954
10955 MVT MaskVT = getMaskTypeFor(VT);
10956 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
10957 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10958 SDValue SelectCond =
10959 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10960 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
10961 DAG.getUNDEF(MaskVT), Mask, VL});
10962 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
10963 Vec, DAG.getUNDEF(VT), VL);
10964 }
10965 case Intrinsic::riscv_vfmv_s_f:
10966 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
10967 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10968 // EGS * EEW >= 128 bits
10969 case Intrinsic::riscv_vaesdf_vv:
10970 case Intrinsic::riscv_vaesdf_vs:
10971 case Intrinsic::riscv_vaesdm_vv:
10972 case Intrinsic::riscv_vaesdm_vs:
10973 case Intrinsic::riscv_vaesef_vv:
10974 case Intrinsic::riscv_vaesef_vs:
10975 case Intrinsic::riscv_vaesem_vv:
10976 case Intrinsic::riscv_vaesem_vs:
10977 case Intrinsic::riscv_vaeskf1:
10978 case Intrinsic::riscv_vaeskf2:
10979 case Intrinsic::riscv_vaesz_vs:
10980 case Intrinsic::riscv_vsm4k:
10981 case Intrinsic::riscv_vsm4r_vv:
10982 case Intrinsic::riscv_vsm4r_vs: {
10983 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
10984 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
10985 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
10986 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
10987 return Op;
10988 }
10989 // EGS * EEW >= 256 bits
10990 case Intrinsic::riscv_vsm3c:
10991 case Intrinsic::riscv_vsm3me: {
10992 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
10993 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
10994 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
10995 return Op;
10996 }
10997 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
10998 case Intrinsic::riscv_vsha2ch:
10999 case Intrinsic::riscv_vsha2cl:
11000 case Intrinsic::riscv_vsha2ms: {
11001 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
11002 !Subtarget.hasStdExtZvknhb())
11003 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
11004 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11005 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11006 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11007 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11008 return Op;
11009 }
11010 case Intrinsic::riscv_sf_vc_v_x:
11011 case Intrinsic::riscv_sf_vc_v_i:
11012 case Intrinsic::riscv_sf_vc_v_xv:
11013 case Intrinsic::riscv_sf_vc_v_iv:
11014 case Intrinsic::riscv_sf_vc_v_vv:
11015 case Intrinsic::riscv_sf_vc_v_fv:
11016 case Intrinsic::riscv_sf_vc_v_xvv:
11017 case Intrinsic::riscv_sf_vc_v_ivv:
11018 case Intrinsic::riscv_sf_vc_v_vvv:
11019 case Intrinsic::riscv_sf_vc_v_fvv:
11020 case Intrinsic::riscv_sf_vc_v_xvw:
11021 case Intrinsic::riscv_sf_vc_v_ivw:
11022 case Intrinsic::riscv_sf_vc_v_vvw:
11023 case Intrinsic::riscv_sf_vc_v_fvw: {
11024 MVT VT = Op.getSimpleValueType();
11025
11026 SmallVector<SDValue> Operands{Op->op_values()};
11027 processVCIXOperands(Op, Operands, DAG);
11028
11029 MVT RetVT = VT;
11030 if (VT.isFixedLengthVector())
11032 else if (VT.isFloatingPoint())
11035
11036 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11037
11038 if (VT.isFixedLengthVector())
11039 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11040 else if (VT.isFloatingPoint())
11041 NewNode = DAG.getBitcast(VT, NewNode);
11042
11043 if (Op == NewNode)
11044 break;
11045
11046 return NewNode;
11047 }
11048 }
11049
11050 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11051}
11052
11054 unsigned Type) {
11055 SDLoc DL(Op);
11056 SmallVector<SDValue> Operands{Op->op_values()};
11057 Operands.erase(Operands.begin() + 1);
11058
11059 const RISCVSubtarget &Subtarget =
11061 MVT VT = Op.getSimpleValueType();
11062 MVT RetVT = VT;
11063 MVT FloatVT = VT;
11064
11065 if (VT.isFloatingPoint()) {
11066 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11067 VT.getVectorElementCount());
11068 FloatVT = RetVT;
11069 }
11070 if (VT.isFixedLengthVector())
11072 Subtarget);
11073
11074 processVCIXOperands(Op, Operands, DAG);
11075
11076 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11077 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11078 SDValue Chain = NewNode.getValue(1);
11079
11080 if (VT.isFixedLengthVector())
11081 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11082 if (VT.isFloatingPoint())
11083 NewNode = DAG.getBitcast(VT, NewNode);
11084
11085 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11086
11087 return NewNode;
11088}
11089
11091 unsigned Type) {
11092 SmallVector<SDValue> Operands{Op->op_values()};
11093 Operands.erase(Operands.begin() + 1);
11094 processVCIXOperands(Op, Operands, DAG);
11095
11096 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11097}
11098
11099static SDValue
11101 const RISCVSubtarget &Subtarget,
11102 SelectionDAG &DAG) {
11103 bool IsStrided;
11104 switch (IntNo) {
11105 case Intrinsic::riscv_seg2_load_mask:
11106 case Intrinsic::riscv_seg3_load_mask:
11107 case Intrinsic::riscv_seg4_load_mask:
11108 case Intrinsic::riscv_seg5_load_mask:
11109 case Intrinsic::riscv_seg6_load_mask:
11110 case Intrinsic::riscv_seg7_load_mask:
11111 case Intrinsic::riscv_seg8_load_mask:
11112 IsStrided = false;
11113 break;
11114 case Intrinsic::riscv_sseg2_load_mask:
11115 case Intrinsic::riscv_sseg3_load_mask:
11116 case Intrinsic::riscv_sseg4_load_mask:
11117 case Intrinsic::riscv_sseg5_load_mask:
11118 case Intrinsic::riscv_sseg6_load_mask:
11119 case Intrinsic::riscv_sseg7_load_mask:
11120 case Intrinsic::riscv_sseg8_load_mask:
11121 IsStrided = true;
11122 break;
11123 default:
11124 llvm_unreachable("unexpected intrinsic ID");
11125 };
11126
11127 static const Intrinsic::ID VlsegInts[7] = {
11128 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11129 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11130 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11131 Intrinsic::riscv_vlseg8_mask};
11132 static const Intrinsic::ID VlssegInts[7] = {
11133 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11134 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11135 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11136 Intrinsic::riscv_vlsseg8_mask};
11137
11138 SDLoc DL(Op);
11139 unsigned NF = Op->getNumValues() - 1;
11140 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11141 MVT XLenVT = Subtarget.getXLenVT();
11142 MVT VT = Op->getSimpleValueType(0);
11143 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11144 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11145 ContainerVT.getScalarSizeInBits();
11146 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11147
11148 // Operands: (chain, int_id, pointer, mask, vl) or
11149 // (chain, int_id, pointer, offset, mask, vl)
11150 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11151 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11152 MVT MaskVT = Mask.getSimpleValueType();
11153 MVT MaskContainerVT =
11154 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11155 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11156
11157 SDValue IntID = DAG.getTargetConstant(
11158 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11159 auto *Load = cast<MemIntrinsicSDNode>(Op);
11160
11161 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11163 Load->getChain(),
11164 IntID,
11165 DAG.getUNDEF(VecTupTy),
11166 Op.getOperand(2),
11167 Mask,
11168 VL,
11171 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11172 // Insert the stride operand.
11173 if (IsStrided)
11174 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11175
11176 SDValue Result =
11178 Load->getMemoryVT(), Load->getMemOperand());
11180 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11181 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11182 Result.getValue(0),
11183 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11184 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11185 }
11186 Results.push_back(Result.getValue(1));
11187 return DAG.getMergeValues(Results, DL);
11188}
11189
11190SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11191 SelectionDAG &DAG) const {
11192 unsigned IntNo = Op.getConstantOperandVal(1);
11193 switch (IntNo) {
11194 default:
11195 break;
11196 case Intrinsic::riscv_seg2_load_mask:
11197 case Intrinsic::riscv_seg3_load_mask:
11198 case Intrinsic::riscv_seg4_load_mask:
11199 case Intrinsic::riscv_seg5_load_mask:
11200 case Intrinsic::riscv_seg6_load_mask:
11201 case Intrinsic::riscv_seg7_load_mask:
11202 case Intrinsic::riscv_seg8_load_mask:
11203 case Intrinsic::riscv_sseg2_load_mask:
11204 case Intrinsic::riscv_sseg3_load_mask:
11205 case Intrinsic::riscv_sseg4_load_mask:
11206 case Intrinsic::riscv_sseg5_load_mask:
11207 case Intrinsic::riscv_sseg6_load_mask:
11208 case Intrinsic::riscv_sseg7_load_mask:
11209 case Intrinsic::riscv_sseg8_load_mask:
11210 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11211
11212 case Intrinsic::riscv_sf_vc_v_x_se:
11213 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11214 case Intrinsic::riscv_sf_vc_v_i_se:
11215 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11216 case Intrinsic::riscv_sf_vc_v_xv_se:
11217 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11218 case Intrinsic::riscv_sf_vc_v_iv_se:
11219 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11220 case Intrinsic::riscv_sf_vc_v_vv_se:
11221 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11222 case Intrinsic::riscv_sf_vc_v_fv_se:
11223 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11224 case Intrinsic::riscv_sf_vc_v_xvv_se:
11225 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11226 case Intrinsic::riscv_sf_vc_v_ivv_se:
11227 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11228 case Intrinsic::riscv_sf_vc_v_vvv_se:
11229 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11230 case Intrinsic::riscv_sf_vc_v_fvv_se:
11231 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11232 case Intrinsic::riscv_sf_vc_v_xvw_se:
11233 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11234 case Intrinsic::riscv_sf_vc_v_ivw_se:
11235 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11236 case Intrinsic::riscv_sf_vc_v_vvw_se:
11237 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11238 case Intrinsic::riscv_sf_vc_v_fvw_se:
11239 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11240 }
11241
11242 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11243}
11244
11245static SDValue
11247 const RISCVSubtarget &Subtarget,
11248 SelectionDAG &DAG) {
11249 bool IsStrided;
11250 switch (IntNo) {
11251 case Intrinsic::riscv_seg2_store_mask:
11252 case Intrinsic::riscv_seg3_store_mask:
11253 case Intrinsic::riscv_seg4_store_mask:
11254 case Intrinsic::riscv_seg5_store_mask:
11255 case Intrinsic::riscv_seg6_store_mask:
11256 case Intrinsic::riscv_seg7_store_mask:
11257 case Intrinsic::riscv_seg8_store_mask:
11258 IsStrided = false;
11259 break;
11260 case Intrinsic::riscv_sseg2_store_mask:
11261 case Intrinsic::riscv_sseg3_store_mask:
11262 case Intrinsic::riscv_sseg4_store_mask:
11263 case Intrinsic::riscv_sseg5_store_mask:
11264 case Intrinsic::riscv_sseg6_store_mask:
11265 case Intrinsic::riscv_sseg7_store_mask:
11266 case Intrinsic::riscv_sseg8_store_mask:
11267 IsStrided = true;
11268 break;
11269 default:
11270 llvm_unreachable("unexpected intrinsic ID");
11271 }
11272
11273 SDLoc DL(Op);
11274 static const Intrinsic::ID VssegInts[] = {
11275 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11276 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11277 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11278 Intrinsic::riscv_vsseg8_mask};
11279 static const Intrinsic::ID VsssegInts[] = {
11280 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11281 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11282 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11283 Intrinsic::riscv_vssseg8_mask};
11284
11285 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11286 // (chain, int_id, vec*, ptr, stride, mask, vl)
11287 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11288 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11289 MVT XLenVT = Subtarget.getXLenVT();
11290 MVT VT = Op->getOperand(2).getSimpleValueType();
11291 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11292 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11293 ContainerVT.getScalarSizeInBits();
11294 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11295
11296 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11297 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11298 MVT MaskVT = Mask.getSimpleValueType();
11299 MVT MaskContainerVT =
11300 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11301 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11302
11303 SDValue IntID = DAG.getTargetConstant(
11304 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11305 SDValue Ptr = Op->getOperand(NF + 2);
11306
11307 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11308
11309 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11310 for (unsigned i = 0; i < NF; i++)
11311 StoredVal = DAG.getNode(
11312 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11313 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11314 DAG, Subtarget),
11315 DAG.getTargetConstant(i, DL, MVT::i32));
11316
11318 FixedIntrinsic->getChain(),
11319 IntID,
11320 StoredVal,
11321 Ptr,
11322 Mask,
11323 VL,
11324 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11325 // Insert the stride operand.
11326 if (IsStrided)
11327 Ops.insert(std::next(Ops.begin(), 4),
11328 Op.getOperand(Op.getNumOperands() - 3));
11329
11330 return DAG.getMemIntrinsicNode(
11331 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11332 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11333}
11334
11335SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11336 SelectionDAG &DAG) const {
11337 unsigned IntNo = Op.getConstantOperandVal(1);
11338 switch (IntNo) {
11339 default:
11340 break;
11341 case Intrinsic::riscv_seg2_store_mask:
11342 case Intrinsic::riscv_seg3_store_mask:
11343 case Intrinsic::riscv_seg4_store_mask:
11344 case Intrinsic::riscv_seg5_store_mask:
11345 case Intrinsic::riscv_seg6_store_mask:
11346 case Intrinsic::riscv_seg7_store_mask:
11347 case Intrinsic::riscv_seg8_store_mask:
11348 case Intrinsic::riscv_sseg2_store_mask:
11349 case Intrinsic::riscv_sseg3_store_mask:
11350 case Intrinsic::riscv_sseg4_store_mask:
11351 case Intrinsic::riscv_sseg5_store_mask:
11352 case Intrinsic::riscv_sseg6_store_mask:
11353 case Intrinsic::riscv_sseg7_store_mask:
11354 case Intrinsic::riscv_sseg8_store_mask:
11355 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11356
11357 case Intrinsic::riscv_sf_vc_xv_se:
11358 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11359 case Intrinsic::riscv_sf_vc_iv_se:
11360 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11361 case Intrinsic::riscv_sf_vc_vv_se:
11362 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11363 case Intrinsic::riscv_sf_vc_fv_se:
11364 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11365 case Intrinsic::riscv_sf_vc_xvv_se:
11366 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11367 case Intrinsic::riscv_sf_vc_ivv_se:
11368 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11369 case Intrinsic::riscv_sf_vc_vvv_se:
11370 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11371 case Intrinsic::riscv_sf_vc_fvv_se:
11372 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11373 case Intrinsic::riscv_sf_vc_xvw_se:
11374 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11375 case Intrinsic::riscv_sf_vc_ivw_se:
11376 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11377 case Intrinsic::riscv_sf_vc_vvw_se:
11378 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11379 case Intrinsic::riscv_sf_vc_fvw_se:
11380 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11381 }
11382
11383 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11384}
11385
11386static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11387 switch (ISDOpcode) {
11388 default:
11389 llvm_unreachable("Unhandled reduction");
11390 case ISD::VP_REDUCE_ADD:
11391 case ISD::VECREDUCE_ADD:
11392 return RISCVISD::VECREDUCE_ADD_VL;
11393 case ISD::VP_REDUCE_UMAX:
11394 case ISD::VECREDUCE_UMAX:
11395 return RISCVISD::VECREDUCE_UMAX_VL;
11396 case ISD::VP_REDUCE_SMAX:
11397 case ISD::VECREDUCE_SMAX:
11398 return RISCVISD::VECREDUCE_SMAX_VL;
11399 case ISD::VP_REDUCE_UMIN:
11400 case ISD::VECREDUCE_UMIN:
11401 return RISCVISD::VECREDUCE_UMIN_VL;
11402 case ISD::VP_REDUCE_SMIN:
11403 case ISD::VECREDUCE_SMIN:
11404 return RISCVISD::VECREDUCE_SMIN_VL;
11405 case ISD::VP_REDUCE_AND:
11406 case ISD::VECREDUCE_AND:
11407 return RISCVISD::VECREDUCE_AND_VL;
11408 case ISD::VP_REDUCE_OR:
11409 case ISD::VECREDUCE_OR:
11410 return RISCVISD::VECREDUCE_OR_VL;
11411 case ISD::VP_REDUCE_XOR:
11412 case ISD::VECREDUCE_XOR:
11413 return RISCVISD::VECREDUCE_XOR_VL;
11414 case ISD::VP_REDUCE_FADD:
11415 return RISCVISD::VECREDUCE_FADD_VL;
11416 case ISD::VP_REDUCE_SEQ_FADD:
11417 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11418 case ISD::VP_REDUCE_FMAX:
11419 case ISD::VP_REDUCE_FMAXIMUM:
11420 return RISCVISD::VECREDUCE_FMAX_VL;
11421 case ISD::VP_REDUCE_FMIN:
11422 case ISD::VP_REDUCE_FMINIMUM:
11423 return RISCVISD::VECREDUCE_FMIN_VL;
11424 }
11425
11426}
11427
11428SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11429 SelectionDAG &DAG,
11430 bool IsVP) const {
11431 SDLoc DL(Op);
11432 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11433 MVT VecVT = Vec.getSimpleValueType();
11434 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11435 Op.getOpcode() == ISD::VECREDUCE_OR ||
11436 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11437 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11438 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11439 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11440 "Unexpected reduction lowering");
11441
11442 MVT XLenVT = Subtarget.getXLenVT();
11443
11444 MVT ContainerVT = VecVT;
11445 if (VecVT.isFixedLengthVector()) {
11446 ContainerVT = getContainerForFixedLengthVector(VecVT);
11447 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11448 }
11449
11450 SDValue Mask, VL;
11451 if (IsVP) {
11452 Mask = Op.getOperand(2);
11453 VL = Op.getOperand(3);
11454 } else {
11455 std::tie(Mask, VL) =
11456 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11457 }
11458
11459 ISD::CondCode CC;
11460 switch (Op.getOpcode()) {
11461 default:
11462 llvm_unreachable("Unhandled reduction");
11463 case ISD::VECREDUCE_AND:
11464 case ISD::VP_REDUCE_AND: {
11465 // vcpop ~x == 0
11466 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11467 if (IsVP || VecVT.isFixedLengthVector())
11468 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11469 else
11470 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11471 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11472 CC = ISD::SETEQ;
11473 break;
11474 }
11475 case ISD::VECREDUCE_OR:
11476 case ISD::VP_REDUCE_OR:
11477 // vcpop x != 0
11478 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11479 CC = ISD::SETNE;
11480 break;
11481 case ISD::VECREDUCE_XOR:
11482 case ISD::VP_REDUCE_XOR: {
11483 // ((vcpop x) & 1) != 0
11484 SDValue One = DAG.getConstant(1, DL, XLenVT);
11485 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11486 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11487 CC = ISD::SETNE;
11488 break;
11489 }
11490 }
11491
11492 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11493 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11494 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11495
11496 if (!IsVP)
11497 return SetCC;
11498
11499 // Now include the start value in the operation.
11500 // Note that we must return the start value when no elements are operated
11501 // upon. The vcpop instructions we've emitted in each case above will return
11502 // 0 for an inactive vector, and so we've already received the neutral value:
11503 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11504 // can simply include the start value.
11505 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11506 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11507}
11508
11509static bool isNonZeroAVL(SDValue AVL) {
11510 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11511 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11512 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11513 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11514}
11515
11516/// Helper to lower a reduction sequence of the form:
11517/// scalar = reduce_op vec, scalar_start
11518static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11519 SDValue StartValue, SDValue Vec, SDValue Mask,
11520 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11521 const RISCVSubtarget &Subtarget) {
11522 const MVT VecVT = Vec.getSimpleValueType();
11523 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11524 const MVT XLenVT = Subtarget.getXLenVT();
11525 const bool NonZeroAVL = isNonZeroAVL(VL);
11526
11527 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11528 // or the original VT if fractional.
11529 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11530 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11531 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11532 // be the result of the reduction operation.
11533 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11534 SDValue InitialValue =
11535 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11536 if (M1VT != InnerVT)
11537 InitialValue =
11538 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11539 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11541 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11542 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11543 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11544}
11545
11546SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11547 SelectionDAG &DAG) const {
11548 SDLoc DL(Op);
11549 SDValue Vec = Op.getOperand(0);
11550 EVT VecEVT = Vec.getValueType();
11551
11552 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11553
11554 // Due to ordering in legalize types we may have a vector type that needs to
11555 // be split. Do that manually so we can get down to a legal type.
11556 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11558 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11559 VecEVT = Lo.getValueType();
11560 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11561 }
11562
11563 // TODO: The type may need to be widened rather than split. Or widened before
11564 // it can be split.
11565 if (!isTypeLegal(VecEVT))
11566 return SDValue();
11567
11568 MVT VecVT = VecEVT.getSimpleVT();
11569 MVT VecEltVT = VecVT.getVectorElementType();
11570 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11571
11572 MVT ContainerVT = VecVT;
11573 if (VecVT.isFixedLengthVector()) {
11574 ContainerVT = getContainerForFixedLengthVector(VecVT);
11575 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11576 }
11577
11578 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11579
11580 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11581 switch (BaseOpc) {
11582 case ISD::AND:
11583 case ISD::OR:
11584 case ISD::UMAX:
11585 case ISD::UMIN:
11586 case ISD::SMAX:
11587 case ISD::SMIN:
11588 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11589 }
11590 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11591 Mask, VL, DL, DAG, Subtarget);
11592}
11593
11594// Given a reduction op, this function returns the matching reduction opcode,
11595// the vector SDValue and the scalar SDValue required to lower this to a
11596// RISCVISD node.
11597static std::tuple<unsigned, SDValue, SDValue>
11599 const RISCVSubtarget &Subtarget) {
11600 SDLoc DL(Op);
11601 auto Flags = Op->getFlags();
11602 unsigned Opcode = Op.getOpcode();
11603 switch (Opcode) {
11604 default:
11605 llvm_unreachable("Unhandled reduction");
11606 case ISD::VECREDUCE_FADD: {
11607 // Use positive zero if we can. It is cheaper to materialize.
11608 SDValue Zero =
11609 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11610 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11611 }
11612 case ISD::VECREDUCE_SEQ_FADD:
11613 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11614 Op.getOperand(0));
11615 case ISD::VECREDUCE_FMINIMUM:
11616 case ISD::VECREDUCE_FMAXIMUM:
11617 case ISD::VECREDUCE_FMIN:
11618 case ISD::VECREDUCE_FMAX: {
11619 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11620 unsigned RVVOpc =
11621 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11622 ? RISCVISD::VECREDUCE_FMIN_VL
11623 : RISCVISD::VECREDUCE_FMAX_VL;
11624 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11625 }
11626 }
11627}
11628
11629SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11630 SelectionDAG &DAG) const {
11631 SDLoc DL(Op);
11632 MVT VecEltVT = Op.getSimpleValueType();
11633
11634 unsigned RVVOpcode;
11635 SDValue VectorVal, ScalarVal;
11636 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11637 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11638 MVT VecVT = VectorVal.getSimpleValueType();
11639
11640 MVT ContainerVT = VecVT;
11641 if (VecVT.isFixedLengthVector()) {
11642 ContainerVT = getContainerForFixedLengthVector(VecVT);
11643 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11644 }
11645
11646 MVT ResVT = Op.getSimpleValueType();
11647 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11648 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11649 VL, DL, DAG, Subtarget);
11650 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11651 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11652 return Res;
11653
11654 if (Op->getFlags().hasNoNaNs())
11655 return Res;
11656
11657 // Force output to NaN if any element is Nan.
11658 SDValue IsNan =
11659 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11660 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11661 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11662 MVT XLenVT = Subtarget.getXLenVT();
11663 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11664 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11665 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11666 return DAG.getSelect(
11667 DL, ResVT, NoNaNs, Res,
11668 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11669}
11670
11671SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11672 SelectionDAG &DAG) const {
11673 SDLoc DL(Op);
11674 unsigned Opc = Op.getOpcode();
11675 SDValue Start = Op.getOperand(0);
11676 SDValue Vec = Op.getOperand(1);
11677 EVT VecEVT = Vec.getValueType();
11678 MVT XLenVT = Subtarget.getXLenVT();
11679
11680 // TODO: The type may need to be widened rather than split. Or widened before
11681 // it can be split.
11682 if (!isTypeLegal(VecEVT))
11683 return SDValue();
11684
11685 MVT VecVT = VecEVT.getSimpleVT();
11686 unsigned RVVOpcode = getRVVReductionOp(Opc);
11687
11688 if (VecVT.isFixedLengthVector()) {
11689 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11690 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11691 }
11692
11693 SDValue VL = Op.getOperand(3);
11694 SDValue Mask = Op.getOperand(2);
11695 SDValue Res =
11696 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11697 Vec, Mask, VL, DL, DAG, Subtarget);
11698 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11699 Op->getFlags().hasNoNaNs())
11700 return Res;
11701
11702 // Propagate NaNs.
11703 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11704 // Check if any of the elements in Vec is NaN.
11705 SDValue IsNaN = DAG.getNode(
11706 RISCVISD::SETCC_VL, DL, PredVT,
11707 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11708 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11709 // Check if the start value is NaN.
11710 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11711 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11712 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11713 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11714 MVT ResVT = Res.getSimpleValueType();
11715 return DAG.getSelect(
11716 DL, ResVT, NoNaNs, Res,
11717 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11718}
11719
11720SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11721 SelectionDAG &DAG) const {
11722 SDValue Vec = Op.getOperand(0);
11723 SDValue SubVec = Op.getOperand(1);
11724 MVT VecVT = Vec.getSimpleValueType();
11725 MVT SubVecVT = SubVec.getSimpleValueType();
11726
11727 SDLoc DL(Op);
11728 MVT XLenVT = Subtarget.getXLenVT();
11729 unsigned OrigIdx = Op.getConstantOperandVal(2);
11730 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11731
11732 if (OrigIdx == 0 && Vec.isUndef())
11733 return Op;
11734
11735 // We don't have the ability to slide mask vectors up indexed by their i1
11736 // elements; the smallest we can do is i8. Often we are able to bitcast to
11737 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11738 // into a scalable one, we might not necessarily have enough scalable
11739 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11740 if (SubVecVT.getVectorElementType() == MVT::i1) {
11741 if (VecVT.getVectorMinNumElements() >= 8 &&
11742 SubVecVT.getVectorMinNumElements() >= 8) {
11743 assert(OrigIdx % 8 == 0 && "Invalid index");
11744 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11745 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11746 "Unexpected mask vector lowering");
11747 OrigIdx /= 8;
11748 SubVecVT =
11749 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11750 SubVecVT.isScalableVector());
11751 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11752 VecVT.isScalableVector());
11753 Vec = DAG.getBitcast(VecVT, Vec);
11754 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11755 } else {
11756 // We can't slide this mask vector up indexed by its i1 elements.
11757 // This poses a problem when we wish to insert a scalable vector which
11758 // can't be re-expressed as a larger type. Just choose the slow path and
11759 // extend to a larger type, then truncate back down.
11760 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11761 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11762 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11763 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11764 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11765 Op.getOperand(2));
11766 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11767 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11768 }
11769 }
11770
11771 // If the subvector vector is a fixed-length type and we don't know VLEN
11772 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11773 // don't know which register of a LMUL group contains the specific subvector
11774 // as we only know the minimum register size. Therefore we must slide the
11775 // vector group up the full amount.
11776 const auto VLen = Subtarget.getRealVLen();
11777 if (SubVecVT.isFixedLengthVector() && !VLen) {
11778 MVT ContainerVT = VecVT;
11779 if (VecVT.isFixedLengthVector()) {
11780 ContainerVT = getContainerForFixedLengthVector(VecVT);
11781 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11782 }
11783
11784 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11785
11786 SDValue Mask =
11787 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11788 // Set the vector length to only the number of elements we care about. Note
11789 // that for slideup this includes the offset.
11790 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11791 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11792
11793 // Use tail agnostic policy if we're inserting over Vec's tail.
11795 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11797
11798 // If we're inserting into the lowest elements, use a tail undisturbed
11799 // vmv.v.v.
11800 if (OrigIdx == 0) {
11801 SubVec =
11802 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11803 } else {
11804 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11805 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11806 SlideupAmt, Mask, VL, Policy);
11807 }
11808
11809 if (VecVT.isFixedLengthVector())
11810 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11811 return DAG.getBitcast(Op.getValueType(), SubVec);
11812 }
11813
11814 MVT ContainerVecVT = VecVT;
11815 if (VecVT.isFixedLengthVector()) {
11816 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11817 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11818 }
11819
11820 MVT ContainerSubVecVT = SubVecVT;
11821 if (SubVecVT.isFixedLengthVector()) {
11822 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11823 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11824 }
11825
11826 unsigned SubRegIdx;
11827 ElementCount RemIdx;
11828 // insert_subvector scales the index by vscale if the subvector is scalable,
11829 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11830 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11831 if (SubVecVT.isFixedLengthVector()) {
11832 assert(VLen);
11833 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11834 auto Decompose =
11836 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11837 SubRegIdx = Decompose.first;
11838 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11839 (OrigIdx % Vscale));
11840 } else {
11841 auto Decompose =
11843 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11844 SubRegIdx = Decompose.first;
11845 RemIdx = ElementCount::getScalable(Decompose.second);
11846 }
11847
11848 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11850 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11851 bool ExactlyVecRegSized =
11852 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11853 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11854
11855 // 1. If the Idx has been completely eliminated and this subvector's size is
11856 // a vector register or a multiple thereof, or the surrounding elements are
11857 // undef, then this is a subvector insert which naturally aligns to a vector
11858 // register. These can easily be handled using subregister manipulation.
11859 // 2. If the subvector isn't an exact multiple of a valid register group size,
11860 // then the insertion must preserve the undisturbed elements of the register.
11861 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11862 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11863 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11864 // of that LMUL=1 type back into the larger vector (resolving to another
11865 // subregister operation). See below for how our VSLIDEUP works. We go via a
11866 // LMUL=1 type to avoid allocating a large register group to hold our
11867 // subvector.
11868 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11869 if (SubVecVT.isFixedLengthVector()) {
11870 // We may get NoSubRegister if inserting at index 0 and the subvec
11871 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11872 if (SubRegIdx == RISCV::NoSubRegister) {
11873 assert(OrigIdx == 0);
11874 return Op;
11875 }
11876
11877 // Use a insert_subvector that will resolve to an insert subreg.
11878 assert(VLen);
11879 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11880 SDValue Insert =
11881 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11882 if (VecVT.isFixedLengthVector())
11883 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11884 return Insert;
11885 }
11886 return Op;
11887 }
11888
11889 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11890 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11891 // (in our case undisturbed). This means we can set up a subvector insertion
11892 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11893 // size of the subvector.
11894 MVT InterSubVT = ContainerVecVT;
11895 SDValue AlignedExtract = Vec;
11896 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11897 if (SubVecVT.isFixedLengthVector()) {
11898 assert(VLen);
11899 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11900 }
11901 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11902 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11903 // Extract a subvector equal to the nearest full vector register type. This
11904 // should resolve to a EXTRACT_SUBREG instruction.
11905 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11906 }
11907
11908 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11909
11910 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11911
11912 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11913 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11914
11915 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11917 if (Subtarget.expandVScale(EndIndex) ==
11918 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11920
11921 // If we're inserting into the lowest elements, use a tail undisturbed
11922 // vmv.v.v.
11923 if (RemIdx.isZero()) {
11924 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11925 SubVec, VL);
11926 } else {
11927 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
11928
11929 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
11930 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
11931
11932 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
11933 SlideupAmt, Mask, VL, Policy);
11934 }
11935
11936 // If required, insert this subvector back into the correct vector register.
11937 // This should resolve to an INSERT_SUBREG instruction.
11938 if (ContainerVecVT.bitsGT(InterSubVT))
11939 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
11940
11941 if (VecVT.isFixedLengthVector())
11942 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11943
11944 // We might have bitcast from a mask type: cast back to the original type if
11945 // required.
11946 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
11947}
11948
11949SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
11950 SelectionDAG &DAG) const {
11951 SDValue Vec = Op.getOperand(0);
11952 MVT SubVecVT = Op.getSimpleValueType();
11953 MVT VecVT = Vec.getSimpleValueType();
11954
11955 SDLoc DL(Op);
11956 MVT XLenVT = Subtarget.getXLenVT();
11957 unsigned OrigIdx = Op.getConstantOperandVal(1);
11958 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11959
11960 // With an index of 0 this is a cast-like subvector, which can be performed
11961 // with subregister operations.
11962 if (OrigIdx == 0)
11963 return Op;
11964
11965 // We don't have the ability to slide mask vectors down indexed by their i1
11966 // elements; the smallest we can do is i8. Often we are able to bitcast to
11967 // equivalent i8 vectors. Note that when extracting a fixed-length vector
11968 // from a scalable one, we might not necessarily have enough scalable
11969 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
11970 if (SubVecVT.getVectorElementType() == MVT::i1) {
11971 if (VecVT.getVectorMinNumElements() >= 8 &&
11972 SubVecVT.getVectorMinNumElements() >= 8) {
11973 assert(OrigIdx % 8 == 0 && "Invalid index");
11974 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11975 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11976 "Unexpected mask vector lowering");
11977 OrigIdx /= 8;
11978 SubVecVT =
11979 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11980 SubVecVT.isScalableVector());
11981 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11982 VecVT.isScalableVector());
11983 Vec = DAG.getBitcast(VecVT, Vec);
11984 } else {
11985 // We can't slide this mask vector down, indexed by its i1 elements.
11986 // This poses a problem when we wish to extract a scalable vector which
11987 // can't be re-expressed as a larger type. Just choose the slow path and
11988 // extend to a larger type, then truncate back down.
11989 // TODO: We could probably improve this when extracting certain fixed
11990 // from fixed, where we can extract as i8 and shift the correct element
11991 // right to reach the desired subvector?
11992 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11993 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11994 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11995 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
11996 Op.getOperand(1));
11997 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
11998 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
11999 }
12000 }
12001
12002 const auto VLen = Subtarget.getRealVLen();
12003
12004 // If the subvector vector is a fixed-length type and we don't know VLEN
12005 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12006 // don't know which register of a LMUL group contains the specific subvector
12007 // as we only know the minimum register size. Therefore we must slide the
12008 // vector group down the full amount.
12009 if (SubVecVT.isFixedLengthVector() && !VLen) {
12010 MVT ContainerVT = VecVT;
12011 if (VecVT.isFixedLengthVector()) {
12012 ContainerVT = getContainerForFixedLengthVector(VecVT);
12013 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12014 }
12015
12016 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12017 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12018 if (auto ShrunkVT =
12019 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12020 ContainerVT = *ShrunkVT;
12021 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12022 }
12023
12024 SDValue Mask =
12025 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12026 // Set the vector length to only the number of elements we care about. This
12027 // avoids sliding down elements we're going to discard straight away.
12028 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12029 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12030 SDValue Slidedown =
12031 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12032 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12033 // Now we can use a cast-like subvector extract to get the result.
12034 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12035 return DAG.getBitcast(Op.getValueType(), Slidedown);
12036 }
12037
12038 if (VecVT.isFixedLengthVector()) {
12039 VecVT = getContainerForFixedLengthVector(VecVT);
12040 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12041 }
12042
12043 MVT ContainerSubVecVT = SubVecVT;
12044 if (SubVecVT.isFixedLengthVector())
12045 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12046
12047 unsigned SubRegIdx;
12048 ElementCount RemIdx;
12049 // extract_subvector scales the index by vscale if the subvector is scalable,
12050 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12051 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12052 if (SubVecVT.isFixedLengthVector()) {
12053 assert(VLen);
12054 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12055 auto Decompose =
12057 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12058 SubRegIdx = Decompose.first;
12059 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12060 (OrigIdx % Vscale));
12061 } else {
12062 auto Decompose =
12064 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12065 SubRegIdx = Decompose.first;
12066 RemIdx = ElementCount::getScalable(Decompose.second);
12067 }
12068
12069 // If the Idx has been completely eliminated then this is a subvector extract
12070 // which naturally aligns to a vector register. These can easily be handled
12071 // using subregister manipulation. We use an extract_subvector that will
12072 // resolve to an extract subreg.
12073 if (RemIdx.isZero()) {
12074 if (SubVecVT.isFixedLengthVector()) {
12075 assert(VLen);
12076 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12077 Vec =
12078 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12079 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12080 }
12081 return Op;
12082 }
12083
12084 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12085 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12086 // divide exactly.
12087 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12088 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12089
12090 // If the vector type is an LMUL-group type, extract a subvector equal to the
12091 // nearest full vector register type.
12092 MVT InterSubVT = VecVT;
12093 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12094 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12095 // we should have successfully decomposed the extract into a subregister.
12096 // We use an extract_subvector that will resolve to a subreg extract.
12097 assert(SubRegIdx != RISCV::NoSubRegister);
12098 (void)SubRegIdx;
12099 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12100 if (SubVecVT.isFixedLengthVector()) {
12101 assert(VLen);
12102 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12103 }
12104 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12105 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12106 }
12107
12108 // Slide this vector register down by the desired number of elements in order
12109 // to place the desired subvector starting at element 0.
12110 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12111 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12112 if (SubVecVT.isFixedLengthVector())
12113 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12114 SDValue Slidedown =
12115 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12116 Vec, SlidedownAmt, Mask, VL);
12117
12118 // Now the vector is in the right position, extract our final subvector. This
12119 // should resolve to a COPY.
12120 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12121
12122 // We might have bitcast from a mask type: cast back to the original type if
12123 // required.
12124 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12125}
12126
12127// Widen a vector's operands to i8, then truncate its results back to the
12128// original type, typically i1. All operand and result types must be the same.
12130 SelectionDAG &DAG) {
12131 MVT VT = N.getSimpleValueType();
12132 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12134 for (SDValue Op : N->ops()) {
12135 assert(Op.getSimpleValueType() == VT &&
12136 "Operands and result must be same type");
12137 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12138 }
12139
12140 unsigned NumVals = N->getNumValues();
12141
12143 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12144 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12145 SmallVector<SDValue, 4> TruncVals;
12146 for (unsigned I = 0; I < NumVals; I++) {
12147 TruncVals.push_back(
12148 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12149 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12150 }
12151
12152 if (TruncVals.size() > 1)
12153 return DAG.getMergeValues(TruncVals, DL);
12154 return TruncVals.front();
12155}
12156
12157SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12158 SelectionDAG &DAG) const {
12159 SDLoc DL(Op);
12160 MVT VecVT = Op.getSimpleValueType();
12161
12162 const unsigned Factor = Op->getNumValues();
12163 assert(Factor <= 8);
12164
12165 // 1 bit element vectors need to be widened to e8
12166 if (VecVT.getVectorElementType() == MVT::i1)
12167 return widenVectorOpsToi8(Op, DL, DAG);
12168
12169 // Convert to scalable vectors first.
12170 if (VecVT.isFixedLengthVector()) {
12171 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12173 for (unsigned i = 0U; i < Factor; ++i)
12174 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12175 Subtarget);
12176
12177 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12178 SDValue NewDeinterleave =
12180
12181 SmallVector<SDValue, 8> Res(Factor);
12182 for (unsigned i = 0U; i < Factor; ++i)
12183 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12184 DAG, Subtarget);
12185 return DAG.getMergeValues(Res, DL);
12186 }
12187
12188 // If concatenating would exceed LMUL=8, we need to split.
12189 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12190 (8 * RISCV::RVVBitsPerBlock)) {
12191 SmallVector<SDValue, 8> Ops(Factor * 2);
12192 for (unsigned i = 0; i != Factor; ++i) {
12193 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12194 Ops[i * 2] = OpLo;
12195 Ops[i * 2 + 1] = OpHi;
12196 }
12197
12198 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12199
12201 ArrayRef(Ops).slice(0, Factor));
12203 ArrayRef(Ops).slice(Factor, Factor));
12204
12205 SmallVector<SDValue, 8> Res(Factor);
12206 for (unsigned i = 0; i != Factor; ++i)
12207 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12208 Hi.getValue(i));
12209
12210 return DAG.getMergeValues(Res, DL);
12211 }
12212
12213 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12214 MVT VT = Op->getSimpleValueType(0);
12215 SDValue V1 = Op->getOperand(0);
12216 SDValue V2 = Op->getOperand(1);
12217
12218 // For fractional LMUL, check if we can use a higher LMUL
12219 // instruction to avoid a vslidedown.
12220 if (SDValue Src = foldConcatVector(V1, V2);
12221 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12222 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12223 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12224 // Freeze the source so we can increase its use count.
12225 Src = DAG.getFreeze(Src);
12226 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12227 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12228 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12229 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12230 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12231 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12232 return DAG.getMergeValues({Even, Odd}, DL);
12233 }
12234
12235 // Freeze the sources so we can increase their use count.
12236 V1 = DAG.getFreeze(V1);
12237 V2 = DAG.getFreeze(V2);
12238 SDValue Even =
12239 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12240 SDValue Odd =
12241 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12242 return DAG.getMergeValues({Even, Odd}, DL);
12243 }
12244
12245 SmallVector<SDValue, 8> Ops(Op->op_values());
12246
12247 // Concatenate the vectors as one vector to deinterleave
12248 MVT ConcatVT =
12251 PowerOf2Ceil(Factor)));
12252 if (Ops.size() < PowerOf2Ceil(Factor))
12253 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12254 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12255
12256 if (Factor == 2) {
12257 // We can deinterleave through vnsrl.wi if the element type is smaller than
12258 // ELEN
12259 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12260 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12261 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12262 return DAG.getMergeValues({Even, Odd}, DL);
12263 }
12264
12265 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12266 // possibly mask vector, then extract the required subvector. Doing this
12267 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12268 // creation to be rematerialized during register allocation to reduce
12269 // register pressure if needed.
12270
12271 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12272
12273 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12274 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12275 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12276
12277 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12278 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12279 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12280
12281 // vcompress the even and odd elements into two separate vectors
12282 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12283 EvenMask, DAG.getUNDEF(ConcatVT));
12284 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12285 OddMask, DAG.getUNDEF(ConcatVT));
12286
12287 // Extract the result half of the gather for even and odd
12288 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12289 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12290
12291 return DAG.getMergeValues({Even, Odd}, DL);
12292 }
12293
12294 // Store with unit-stride store and load it back with segmented load.
12295 MVT XLenVT = Subtarget.getXLenVT();
12296 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12297 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12298
12299 // Allocate a stack slot.
12300 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12302 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12303 auto &MF = DAG.getMachineFunction();
12304 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12305 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12306
12307 SDValue StoreOps[] = {DAG.getEntryNode(),
12308 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12309 Concat, StackPtr, VL};
12310
12311 SDValue Chain = DAG.getMemIntrinsicNode(
12312 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12313 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12315
12316 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12317 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12318 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12319 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12320 Intrinsic::riscv_vlseg8_mask};
12321
12322 SDValue LoadOps[] = {
12323 Chain,
12324 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12325 Passthru,
12326 StackPtr,
12327 Mask,
12328 VL,
12331 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12332
12333 unsigned Sz =
12334 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12335 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12336
12338 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12339 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12341
12342 SmallVector<SDValue, 8> Res(Factor);
12343
12344 for (unsigned i = 0U; i < Factor; ++i)
12345 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12346 DAG.getTargetConstant(i, DL, MVT::i32));
12347
12348 return DAG.getMergeValues(Res, DL);
12349}
12350
12351SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12352 SelectionDAG &DAG) const {
12353 SDLoc DL(Op);
12354 MVT VecVT = Op.getSimpleValueType();
12355
12356 const unsigned Factor = Op.getNumOperands();
12357 assert(Factor <= 8);
12358
12359 // i1 vectors need to be widened to i8
12360 if (VecVT.getVectorElementType() == MVT::i1)
12361 return widenVectorOpsToi8(Op, DL, DAG);
12362
12363 // Convert to scalable vectors first.
12364 if (VecVT.isFixedLengthVector()) {
12365 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12367 for (unsigned i = 0U; i < Factor; ++i)
12368 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12369 Subtarget);
12370
12371 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12372 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12373
12374 SmallVector<SDValue, 8> Res(Factor);
12375 for (unsigned i = 0U; i < Factor; ++i)
12376 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12377 Subtarget);
12378 return DAG.getMergeValues(Res, DL);
12379 }
12380
12381 MVT XLenVT = Subtarget.getXLenVT();
12382 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12383
12384 // If the VT is larger than LMUL=8, we need to split and reassemble.
12385 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12386 (8 * RISCV::RVVBitsPerBlock)) {
12387 SmallVector<SDValue, 8> Ops(Factor * 2);
12388 for (unsigned i = 0; i != Factor; ++i) {
12389 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12390 Ops[i] = OpLo;
12391 Ops[i + Factor] = OpHi;
12392 }
12393
12394 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12395
12396 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12397 ArrayRef(Ops).take_front(Factor)),
12399 ArrayRef(Ops).drop_front(Factor))};
12400
12401 SmallVector<SDValue, 8> Concats(Factor);
12402 for (unsigned i = 0; i != Factor; ++i) {
12403 unsigned IdxLo = 2 * i;
12404 unsigned IdxHi = 2 * i + 1;
12405 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12406 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12407 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12408 }
12409
12410 return DAG.getMergeValues(Concats, DL);
12411 }
12412
12413 SDValue Interleaved;
12414
12415 // Spill to the stack using a segment store for simplicity.
12416 if (Factor != 2) {
12417 EVT MemVT =
12419 VecVT.getVectorElementCount() * Factor);
12420
12421 // Allocate a stack slot.
12422 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12424 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12425 EVT PtrVT = StackPtr.getValueType();
12426 auto &MF = DAG.getMachineFunction();
12427 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12428 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12429
12430 static const Intrinsic::ID IntrIds[] = {
12431 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12432 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12433 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12434 Intrinsic::riscv_vsseg8_mask,
12435 };
12436
12437 unsigned Sz =
12438 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12439 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12440
12441 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12442 for (unsigned i = 0; i < Factor; i++)
12443 StoredVal =
12444 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12445 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12446
12447 SDValue Ops[] = {DAG.getEntryNode(),
12448 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12449 StoredVal,
12450 StackPtr,
12451 Mask,
12452 VL,
12454 DL, XLenVT)};
12455
12456 SDValue Chain = DAG.getMemIntrinsicNode(
12457 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12458 VecVT.getVectorElementType(), PtrInfo, Alignment,
12460
12461 SmallVector<SDValue, 8> Loads(Factor);
12462
12464 DAG.getVScale(DL, PtrVT,
12465 APInt(PtrVT.getFixedSizeInBits(),
12466 VecVT.getStoreSize().getKnownMinValue()));
12467 for (unsigned i = 0; i != Factor; ++i) {
12468 if (i != 0)
12469 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12470
12471 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12472 }
12473
12474 return DAG.getMergeValues(Loads, DL);
12475 }
12476
12477 // Use ri.vzip2{a,b} if available
12478 // TODO: Figure out the best lowering for the spread variants
12479 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12480 !Op.getOperand(1).isUndef()) {
12481 // Freeze the sources so we can increase their use count.
12482 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12483 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12484 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12485 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12486 return DAG.getMergeValues({Lo, Hi}, DL);
12487 }
12488
12489 // If the element type is smaller than ELEN, then we can interleave with
12490 // vwaddu.vv and vwmaccu.vx
12491 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12492 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12493 DAG, Subtarget);
12494 } else {
12495 // Otherwise, fallback to using vrgathere16.vv
12496 MVT ConcatVT =
12499 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12500 Op.getOperand(0), Op.getOperand(1));
12501
12502 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12503
12504 // 0 1 2 3 4 5 6 7 ...
12505 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12506
12507 // 1 1 1 1 1 1 1 1 ...
12508 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12509
12510 // 1 0 1 0 1 0 1 0 ...
12511 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12512 OddMask = DAG.getSetCC(
12513 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12514 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12516
12517 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12518
12519 // Build up the index vector for interleaving the concatenated vector
12520 // 0 0 1 1 2 2 3 3 ...
12521 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12522 // 0 n 1 n+1 2 n+2 3 n+3 ...
12523 Idx =
12524 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12525
12526 // Then perform the interleave
12527 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12528 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12529 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12530 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12531 }
12532
12533 // Extract the two halves from the interleaved result
12534 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12535 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12536 VecVT.getVectorMinNumElements());
12537
12538 return DAG.getMergeValues({Lo, Hi}, DL);
12539}
12540
12541// Lower step_vector to the vid instruction. Any non-identity step value must
12542// be accounted for my manual expansion.
12543SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12544 SelectionDAG &DAG) const {
12545 SDLoc DL(Op);
12546 MVT VT = Op.getSimpleValueType();
12547 assert(VT.isScalableVector() && "Expected scalable vector");
12548 MVT XLenVT = Subtarget.getXLenVT();
12549 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12550 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12551 uint64_t StepValImm = Op.getConstantOperandVal(0);
12552 if (StepValImm != 1) {
12553 if (isPowerOf2_64(StepValImm)) {
12554 SDValue StepVal =
12555 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12556 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12557 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12558 } else {
12559 SDValue StepVal = lowerScalarSplat(
12560 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12561 VL, VT, DL, DAG, Subtarget);
12562 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12563 }
12564 }
12565 return StepVec;
12566}
12567
12568// Implement vector_reverse using vrgather.vv with indices determined by
12569// subtracting the id of each element from (VLMAX-1). This will convert
12570// the indices like so:
12571// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12572// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12573SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12574 SelectionDAG &DAG) const {
12575 SDLoc DL(Op);
12576 MVT VecVT = Op.getSimpleValueType();
12577 if (VecVT.getVectorElementType() == MVT::i1) {
12578 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12579 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12580 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12581 return DAG.getSetCC(DL, VecVT, Op2,
12582 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12583 }
12584
12585 MVT ContainerVT = VecVT;
12586 SDValue Vec = Op.getOperand(0);
12587 if (VecVT.isFixedLengthVector()) {
12588 ContainerVT = getContainerForFixedLengthVector(VecVT);
12589 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12590 }
12591
12592 MVT XLenVT = Subtarget.getXLenVT();
12593 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12594
12595 // On some uarchs vrgather.vv will read from every input register for each
12596 // output register, regardless of the indices. However to reverse a vector
12597 // each output register only needs to read from one register. So decompose it
12598 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12599 // O(LMUL^2).
12600 //
12601 // vsetvli a1, zero, e64, m4, ta, ma
12602 // vrgatherei16.vv v12, v8, v16
12603 // ->
12604 // vsetvli a1, zero, e64, m1, ta, ma
12605 // vrgather.vv v15, v8, v16
12606 // vrgather.vv v14, v9, v16
12607 // vrgather.vv v13, v10, v16
12608 // vrgather.vv v12, v11, v16
12609 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12610 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12611 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12612 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12613 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12614 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12615
12616 // Fixed length vectors might not fit exactly into their container, and so
12617 // leave a gap in the front of the vector after being reversed. Slide this
12618 // away.
12619 //
12620 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12621 // 0 1 2 3 x x x x <- reverse
12622 // x x x x 0 1 2 3 <- vslidedown.vx
12623 if (VecVT.isFixedLengthVector()) {
12624 SDValue Offset = DAG.getNode(
12625 ISD::SUB, DL, XLenVT,
12626 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12627 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12628 Concat =
12629 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12630 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12631 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12632 }
12633 return Concat;
12634 }
12635
12636 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12637 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12638 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12639 unsigned MaxVLMAX =
12640 VecVT.isFixedLengthVector()
12641 ? VecVT.getVectorNumElements()
12642 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12643
12644 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12645 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12646
12647 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12648 // to use vrgatherei16.vv.
12649 if (MaxVLMAX > 256 && EltSize == 8) {
12650 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12651 // Reverse each half, then reassemble them in reverse order.
12652 // NOTE: It's also possible that after splitting that VLMAX no longer
12653 // requires vrgatherei16.vv.
12654 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12655 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12656 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12657 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12658 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12659 // Reassemble the low and high pieces reversed.
12660 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
12661 }
12662
12663 // Just promote the int type to i16 which will double the LMUL.
12664 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12665 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12666 }
12667
12668 // At LMUL > 1, do the index computation in 16 bits to reduce register
12669 // pressure.
12670 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12671 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12672 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12673 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12674 IntVT = IntVT.changeVectorElementType(MVT::i16);
12675 }
12676
12677 // Calculate VLMAX-1 for the desired SEW.
12678 SDValue VLMinus1 = DAG.getNode(
12679 ISD::SUB, DL, XLenVT,
12680 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12681 DAG.getConstant(1, DL, XLenVT));
12682
12683 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12684 bool IsRV32E64 =
12685 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12686 SDValue SplatVL;
12687 if (!IsRV32E64)
12688 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12689 else
12690 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12691 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12692
12693 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12694 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12695 DAG.getUNDEF(IntVT), Mask, VL);
12696
12697 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12698 DAG.getUNDEF(ContainerVT), Mask, VL);
12699 if (VecVT.isFixedLengthVector())
12700 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12701 return Gather;
12702}
12703
12704SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12705 SelectionDAG &DAG) const {
12706 SDLoc DL(Op);
12707 SDValue V1 = Op.getOperand(0);
12708 SDValue V2 = Op.getOperand(1);
12709 MVT XLenVT = Subtarget.getXLenVT();
12710 MVT VecVT = Op.getSimpleValueType();
12711
12712 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12713
12714 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12715 SDValue DownOffset, UpOffset;
12716 if (ImmValue >= 0) {
12717 // The operand is a TargetConstant, we need to rebuild it as a regular
12718 // constant.
12719 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12720 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12721 } else {
12722 // The operand is a TargetConstant, we need to rebuild it as a regular
12723 // constant rather than negating the original operand.
12724 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12725 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12726 }
12727
12728 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12729
12730 SDValue SlideDown = getVSlidedown(
12731 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12732 Subtarget.hasVLDependentLatency() ? UpOffset
12733 : DAG.getRegister(RISCV::X0, XLenVT));
12734 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12735 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12737}
12738
12739SDValue
12740RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12741 SelectionDAG &DAG) const {
12742 SDLoc DL(Op);
12743 auto *Load = cast<LoadSDNode>(Op);
12744
12746 Load->getMemoryVT(),
12747 *Load->getMemOperand()) &&
12748 "Expecting a correctly-aligned load");
12749
12750 MVT VT = Op.getSimpleValueType();
12751 MVT XLenVT = Subtarget.getXLenVT();
12752 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12753
12754 // If we know the exact VLEN and our fixed length vector completely fills
12755 // the container, use a whole register load instead.
12756 const auto [MinVLMAX, MaxVLMAX] =
12757 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12758 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12759 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12760 MachineMemOperand *MMO = Load->getMemOperand();
12761 SDValue NewLoad =
12762 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12763 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12764 MMO->getAAInfo(), MMO->getRanges());
12765 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12766 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12767 }
12768
12769 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12770
12771 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12772 SDValue IntID = DAG.getTargetConstant(
12773 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12774 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12775 if (!IsMaskOp)
12776 Ops.push_back(DAG.getUNDEF(ContainerVT));
12777 Ops.push_back(Load->getBasePtr());
12778 Ops.push_back(VL);
12779 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12780 SDValue NewLoad =
12782 Load->getMemoryVT(), Load->getMemOperand());
12783
12784 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12785 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12786}
12787
12788SDValue
12789RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12790 SelectionDAG &DAG) const {
12791 SDLoc DL(Op);
12792 auto *Store = cast<StoreSDNode>(Op);
12793
12795 Store->getMemoryVT(),
12796 *Store->getMemOperand()) &&
12797 "Expecting a correctly-aligned store");
12798
12799 SDValue StoreVal = Store->getValue();
12800 MVT VT = StoreVal.getSimpleValueType();
12801 MVT XLenVT = Subtarget.getXLenVT();
12802
12803 // If the size less than a byte, we need to pad with zeros to make a byte.
12804 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12805 VT = MVT::v8i1;
12806 StoreVal =
12807 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12808 }
12809
12810 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12811
12812 SDValue NewValue =
12813 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12814
12815 // If we know the exact VLEN and our fixed length vector completely fills
12816 // the container, use a whole register store instead.
12817 const auto [MinVLMAX, MaxVLMAX] =
12818 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12819 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12820 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12821 MachineMemOperand *MMO = Store->getMemOperand();
12822 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12823 MMO->getPointerInfo(), MMO->getBaseAlign(),
12824 MMO->getFlags(), MMO->getAAInfo());
12825 }
12826
12827 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12828
12829 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12830 SDValue IntID = DAG.getTargetConstant(
12831 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12832 return DAG.getMemIntrinsicNode(
12833 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12834 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12835 Store->getMemoryVT(), Store->getMemOperand());
12836}
12837
12838SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12839 SelectionDAG &DAG) const {
12840 SDLoc DL(Op);
12841 MVT VT = Op.getSimpleValueType();
12842
12843 const auto *MemSD = cast<MemSDNode>(Op);
12844 EVT MemVT = MemSD->getMemoryVT();
12845 MachineMemOperand *MMO = MemSD->getMemOperand();
12846 SDValue Chain = MemSD->getChain();
12847 SDValue BasePtr = MemSD->getBasePtr();
12848
12849 SDValue Mask, PassThru, VL;
12850 bool IsExpandingLoad = false;
12851 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12852 Mask = VPLoad->getMask();
12853 PassThru = DAG.getUNDEF(VT);
12854 VL = VPLoad->getVectorLength();
12855 } else {
12856 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12857 Mask = MLoad->getMask();
12858 PassThru = MLoad->getPassThru();
12859 IsExpandingLoad = MLoad->isExpandingLoad();
12860 }
12861
12862 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12863
12864 MVT XLenVT = Subtarget.getXLenVT();
12865
12866 MVT ContainerVT = VT;
12867 if (VT.isFixedLengthVector()) {
12868 ContainerVT = getContainerForFixedLengthVector(VT);
12869 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12870 if (!IsUnmasked) {
12871 MVT MaskVT = getMaskTypeFor(ContainerVT);
12872 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12873 }
12874 }
12875
12876 if (!VL)
12877 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12878
12879 SDValue ExpandingVL;
12880 if (!IsUnmasked && IsExpandingLoad) {
12881 ExpandingVL = VL;
12882 VL =
12883 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12884 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12885 }
12886
12887 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12888 : Intrinsic::riscv_vle_mask;
12889 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12890 if (IntID == Intrinsic::riscv_vle)
12891 Ops.push_back(DAG.getUNDEF(ContainerVT));
12892 else
12893 Ops.push_back(PassThru);
12894 Ops.push_back(BasePtr);
12895 if (IntID == Intrinsic::riscv_vle_mask)
12896 Ops.push_back(Mask);
12897 Ops.push_back(VL);
12898 if (IntID == Intrinsic::riscv_vle_mask)
12899 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12900
12901 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12902
12903 SDValue Result =
12904 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12905 Chain = Result.getValue(1);
12906 if (ExpandingVL) {
12907 MVT IndexVT = ContainerVT;
12908 if (ContainerVT.isFloatingPoint())
12909 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12910
12911 MVT IndexEltVT = IndexVT.getVectorElementType();
12912 bool UseVRGATHEREI16 = false;
12913 // If index vector is an i8 vector and the element count exceeds 256, we
12914 // should change the element type of index vector to i16 to avoid
12915 // overflow.
12916 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12917 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12918 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12919 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12920 UseVRGATHEREI16 = true;
12921 }
12922
12923 SDValue Iota =
12924 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12925 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
12926 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
12927 Result =
12928 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
12929 : RISCVISD::VRGATHER_VV_VL,
12930 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
12931 }
12932
12933 if (VT.isFixedLengthVector())
12934 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12935
12936 return DAG.getMergeValues({Result, Chain}, DL);
12937}
12938
12939SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
12940 SDLoc DL(Op);
12941 MVT VT = Op->getSimpleValueType(0);
12942
12943 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
12944 EVT MemVT = VPLoadFF->getMemoryVT();
12945 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
12946 SDValue Chain = VPLoadFF->getChain();
12947 SDValue BasePtr = VPLoadFF->getBasePtr();
12948
12949 SDValue Mask = VPLoadFF->getMask();
12950 SDValue VL = VPLoadFF->getVectorLength();
12951
12952 MVT XLenVT = Subtarget.getXLenVT();
12953
12954 MVT ContainerVT = VT;
12955 if (VT.isFixedLengthVector()) {
12956 ContainerVT = getContainerForFixedLengthVector(VT);
12957 MVT MaskVT = getMaskTypeFor(ContainerVT);
12958 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12959 }
12960
12961 unsigned IntID = Intrinsic::riscv_vleff_mask;
12962 SDValue Ops[] = {
12963 Chain,
12964 DAG.getTargetConstant(IntID, DL, XLenVT),
12965 DAG.getUNDEF(ContainerVT),
12966 BasePtr,
12967 Mask,
12968 VL,
12970
12971 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
12972
12973 SDValue Result =
12974 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12975 SDValue OutVL = Result.getValue(1);
12976 Chain = Result.getValue(2);
12977
12978 if (VT.isFixedLengthVector())
12979 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12980
12981 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
12982}
12983
12984SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
12985 SelectionDAG &DAG) const {
12986 SDLoc DL(Op);
12987
12988 const auto *MemSD = cast<MemSDNode>(Op);
12989 EVT MemVT = MemSD->getMemoryVT();
12990 MachineMemOperand *MMO = MemSD->getMemOperand();
12991 SDValue Chain = MemSD->getChain();
12992 SDValue BasePtr = MemSD->getBasePtr();
12993 SDValue Val, Mask, VL;
12994
12995 bool IsCompressingStore = false;
12996 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
12997 Val = VPStore->getValue();
12998 Mask = VPStore->getMask();
12999 VL = VPStore->getVectorLength();
13000 } else {
13001 const auto *MStore = cast<MaskedStoreSDNode>(Op);
13002 Val = MStore->getValue();
13003 Mask = MStore->getMask();
13004 IsCompressingStore = MStore->isCompressingStore();
13005 }
13006
13007 bool IsUnmasked =
13008 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13009
13010 MVT VT = Val.getSimpleValueType();
13011 MVT XLenVT = Subtarget.getXLenVT();
13012
13013 MVT ContainerVT = VT;
13014 if (VT.isFixedLengthVector()) {
13015 ContainerVT = getContainerForFixedLengthVector(VT);
13016
13017 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13018 if (!IsUnmasked || IsCompressingStore) {
13019 MVT MaskVT = getMaskTypeFor(ContainerVT);
13020 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13021 }
13022 }
13023
13024 if (!VL)
13025 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13026
13027 if (IsCompressingStore) {
13028 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13029 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13030 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13031 VL =
13032 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13033 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13034 }
13035
13036 unsigned IntID =
13037 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13038 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13039 Ops.push_back(Val);
13040 Ops.push_back(BasePtr);
13041 if (!IsUnmasked)
13042 Ops.push_back(Mask);
13043 Ops.push_back(VL);
13044
13046 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13047}
13048
13049SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13050 SelectionDAG &DAG) const {
13051 SDLoc DL(Op);
13052 SDValue Val = Op.getOperand(0);
13053 SDValue Mask = Op.getOperand(1);
13054 SDValue Passthru = Op.getOperand(2);
13055
13056 MVT VT = Val.getSimpleValueType();
13057 MVT XLenVT = Subtarget.getXLenVT();
13058 MVT ContainerVT = VT;
13059 if (VT.isFixedLengthVector()) {
13060 ContainerVT = getContainerForFixedLengthVector(VT);
13061 MVT MaskVT = getMaskTypeFor(ContainerVT);
13062 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13063 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13064 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13065 }
13066
13067 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13068 SDValue Res =
13069 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13070 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13071 Passthru, Val, Mask, VL);
13072
13073 if (VT.isFixedLengthVector())
13074 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13075
13076 return Res;
13077}
13078
13079SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13080 SelectionDAG &DAG) const {
13081 unsigned Opc = Op.getOpcode();
13082 SDLoc DL(Op);
13083 SDValue Chain = Op.getOperand(0);
13084 SDValue Op1 = Op.getOperand(1);
13085 SDValue Op2 = Op.getOperand(2);
13086 SDValue CC = Op.getOperand(3);
13087 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13088 MVT VT = Op.getSimpleValueType();
13089 MVT InVT = Op1.getSimpleValueType();
13090
13091 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13092 // condition code.
13093 if (Opc == ISD::STRICT_FSETCCS) {
13094 // Expand strict_fsetccs(x, oeq) to
13095 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13096 SDVTList VTList = Op->getVTList();
13097 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13098 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13099 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13100 Op2, OLECCVal);
13101 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13102 Op1, OLECCVal);
13103 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13104 Tmp1.getValue(1), Tmp2.getValue(1));
13105 // Tmp1 and Tmp2 might be the same node.
13106 if (Tmp1 != Tmp2)
13107 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13108 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13109 }
13110
13111 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13112 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13113 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13114 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13115 Op2, OEQCCVal);
13116 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13117 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13118 }
13119 }
13120
13121 MVT ContainerInVT = InVT;
13122 if (InVT.isFixedLengthVector()) {
13123 ContainerInVT = getContainerForFixedLengthVector(InVT);
13124 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13125 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13126 }
13127 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13128
13129 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13130
13131 SDValue Res;
13132 if (Opc == ISD::STRICT_FSETCC &&
13133 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13134 CCVal == ISD::SETOLE)) {
13135 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13136 // active when both input elements are ordered.
13137 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13138 SDValue OrderMask1 = DAG.getNode(
13139 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13140 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13141 True, VL});
13142 SDValue OrderMask2 = DAG.getNode(
13143 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13144 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13145 True, VL});
13146 Mask =
13147 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13148 // Use Mask as the passthru operand to let the result be 0 if either of the
13149 // inputs is unordered.
13150 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13151 DAG.getVTList(MaskVT, MVT::Other),
13152 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13153 } else {
13154 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13155 : RISCVISD::STRICT_FSETCCS_VL;
13156 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13157 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13158 }
13159
13160 if (VT.isFixedLengthVector()) {
13161 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13162 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13163 }
13164 return Res;
13165}
13166
13167// Lower vector ABS to smax(X, sub(0, X)).
13168SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13169 SDLoc DL(Op);
13170 MVT VT = Op.getSimpleValueType();
13171 SDValue X = Op.getOperand(0);
13172
13173 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13174 "Unexpected type for ISD::ABS");
13175
13176 MVT ContainerVT = VT;
13177 if (VT.isFixedLengthVector()) {
13178 ContainerVT = getContainerForFixedLengthVector(VT);
13179 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13180 }
13181
13182 SDValue Mask, VL;
13183 if (Op->getOpcode() == ISD::VP_ABS) {
13184 Mask = Op->getOperand(1);
13185 if (VT.isFixedLengthVector())
13186 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13187 Subtarget);
13188 VL = Op->getOperand(2);
13189 } else
13190 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13191
13192 SDValue SplatZero = DAG.getNode(
13193 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13194 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13195 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13196 DAG.getUNDEF(ContainerVT), Mask, VL);
13197 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13198 DAG.getUNDEF(ContainerVT), Mask, VL);
13199
13200 if (VT.isFixedLengthVector())
13201 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13202 return Max;
13203}
13204
13205SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13206 SelectionDAG &DAG) const {
13207 const auto &TSInfo =
13208 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13209
13210 unsigned NewOpc = getRISCVVLOp(Op);
13211 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13212 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13213
13214 MVT VT = Op.getSimpleValueType();
13215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13216
13217 // Create list of operands by converting existing ones to scalable types.
13219 for (const SDValue &V : Op->op_values()) {
13220 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13221
13222 // Pass through non-vector operands.
13223 if (!V.getValueType().isVector()) {
13224 Ops.push_back(V);
13225 continue;
13226 }
13227
13228 // "cast" fixed length vector to a scalable vector.
13229 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13230 "Only fixed length vectors are supported!");
13231 MVT VContainerVT = ContainerVT.changeVectorElementType(
13232 V.getSimpleValueType().getVectorElementType());
13233 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13234 }
13235
13236 SDLoc DL(Op);
13237 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13238 if (HasPassthruOp)
13239 Ops.push_back(DAG.getUNDEF(ContainerVT));
13240 if (HasMask)
13241 Ops.push_back(Mask);
13242 Ops.push_back(VL);
13243
13244 // StrictFP operations have two result values. Their lowered result should
13245 // have same result count.
13246 if (Op->isStrictFPOpcode()) {
13247 SDValue ScalableRes =
13248 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13249 Op->getFlags());
13250 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13251 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13252 }
13253
13254 SDValue ScalableRes =
13255 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13256 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13257}
13258
13259// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13260// * Operands of each node are assumed to be in the same order.
13261// * The EVL operand is promoted from i32 to i64 on RV64.
13262// * Fixed-length vectors are converted to their scalable-vector container
13263// types.
13264SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13265 const auto &TSInfo =
13266 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13267
13268 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13269 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13270
13271 SDLoc DL(Op);
13272 MVT VT = Op.getSimpleValueType();
13274
13275 MVT ContainerVT = VT;
13276 if (VT.isFixedLengthVector())
13277 ContainerVT = getContainerForFixedLengthVector(VT);
13278
13279 for (const auto &OpIdx : enumerate(Op->ops())) {
13280 SDValue V = OpIdx.value();
13281 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13282 // Add dummy passthru value before the mask. Or if there isn't a mask,
13283 // before EVL.
13284 if (HasPassthruOp) {
13285 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13286 if (MaskIdx) {
13287 if (*MaskIdx == OpIdx.index())
13288 Ops.push_back(DAG.getUNDEF(ContainerVT));
13289 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13290 OpIdx.index()) {
13291 if (Op.getOpcode() == ISD::VP_MERGE) {
13292 // For VP_MERGE, copy the false operand instead of an undef value.
13293 Ops.push_back(Ops.back());
13294 } else {
13295 assert(Op.getOpcode() == ISD::VP_SELECT);
13296 // For VP_SELECT, add an undef value.
13297 Ops.push_back(DAG.getUNDEF(ContainerVT));
13298 }
13299 }
13300 }
13301 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13302 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13303 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13305 Subtarget.getXLenVT()));
13306 // Pass through operands which aren't fixed-length vectors.
13307 if (!V.getValueType().isFixedLengthVector()) {
13308 Ops.push_back(V);
13309 continue;
13310 }
13311 // "cast" fixed length vector to a scalable vector.
13312 MVT OpVT = V.getSimpleValueType();
13313 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13314 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13315 "Only fixed length vectors are supported!");
13316 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13317 }
13318
13319 if (!VT.isFixedLengthVector())
13320 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13321
13322 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13323
13324 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13325}
13326
13327SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13328 SelectionDAG &DAG) const {
13329 SDLoc DL(Op);
13330 MVT VT = Op.getSimpleValueType();
13331
13332 SDValue Src = Op.getOperand(0);
13333 // NOTE: Mask is dropped.
13334 SDValue VL = Op.getOperand(2);
13335
13336 MVT ContainerVT = VT;
13337 if (VT.isFixedLengthVector()) {
13338 ContainerVT = getContainerForFixedLengthVector(VT);
13339 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13340 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13341 }
13342
13343 MVT XLenVT = Subtarget.getXLenVT();
13344 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13345 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13346 DAG.getUNDEF(ContainerVT), Zero, VL);
13347
13348 SDValue SplatValue = DAG.getSignedConstant(
13349 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13350 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13351 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13352
13353 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13354 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13355 if (!VT.isFixedLengthVector())
13356 return Result;
13357 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13358}
13359
13360SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13361 SelectionDAG &DAG) const {
13362 SDLoc DL(Op);
13363 MVT VT = Op.getSimpleValueType();
13364
13365 SDValue Op1 = Op.getOperand(0);
13366 SDValue Op2 = Op.getOperand(1);
13367 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13368 // NOTE: Mask is dropped.
13369 SDValue VL = Op.getOperand(4);
13370
13371 MVT ContainerVT = VT;
13372 if (VT.isFixedLengthVector()) {
13373 ContainerVT = getContainerForFixedLengthVector(VT);
13374 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13375 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13376 }
13377
13379 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13380
13381 switch (Condition) {
13382 default:
13383 break;
13384 // X != Y --> (X^Y)
13385 case ISD::SETNE:
13386 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13387 break;
13388 // X == Y --> ~(X^Y)
13389 case ISD::SETEQ: {
13390 SDValue Temp =
13391 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13392 Result =
13393 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13394 break;
13395 }
13396 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13397 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13398 case ISD::SETGT:
13399 case ISD::SETULT: {
13400 SDValue Temp =
13401 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13402 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13403 break;
13404 }
13405 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13406 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13407 case ISD::SETLT:
13408 case ISD::SETUGT: {
13409 SDValue Temp =
13410 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13411 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13412 break;
13413 }
13414 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13415 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13416 case ISD::SETGE:
13417 case ISD::SETULE: {
13418 SDValue Temp =
13419 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13420 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13421 break;
13422 }
13423 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13424 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13425 case ISD::SETLE:
13426 case ISD::SETUGE: {
13427 SDValue Temp =
13428 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13429 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13430 break;
13431 }
13432 }
13433
13434 if (!VT.isFixedLengthVector())
13435 return Result;
13436 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13437}
13438
13439// Lower Floating-Point/Integer Type-Convert VP SDNodes
13440SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13441 SelectionDAG &DAG) const {
13442 SDLoc DL(Op);
13443
13444 SDValue Src = Op.getOperand(0);
13445 SDValue Mask = Op.getOperand(1);
13446 SDValue VL = Op.getOperand(2);
13447 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13448
13449 MVT DstVT = Op.getSimpleValueType();
13450 MVT SrcVT = Src.getSimpleValueType();
13451 if (DstVT.isFixedLengthVector()) {
13452 DstVT = getContainerForFixedLengthVector(DstVT);
13453 SrcVT = getContainerForFixedLengthVector(SrcVT);
13454 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13455 MVT MaskVT = getMaskTypeFor(DstVT);
13456 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13457 }
13458
13459 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13460 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13461
13463 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13464 if (SrcVT.isInteger()) {
13465 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13466
13467 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13468 ? RISCVISD::VSEXT_VL
13469 : RISCVISD::VZEXT_VL;
13470
13471 // Do we need to do any pre-widening before converting?
13472 if (SrcEltSize == 1) {
13473 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13474 MVT XLenVT = Subtarget.getXLenVT();
13475 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13476 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13477 DAG.getUNDEF(IntVT), Zero, VL);
13478 SDValue One = DAG.getSignedConstant(
13479 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13480 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13481 DAG.getUNDEF(IntVT), One, VL);
13482 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13483 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13484 } else if (DstEltSize > (2 * SrcEltSize)) {
13485 // Widen before converting.
13486 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13487 DstVT.getVectorElementCount());
13488 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13489 }
13490
13491 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13492 } else {
13493 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13494 "Wrong input/output vector types");
13495
13496 // Convert f16 to f32 then convert f32 to i64.
13497 if (DstEltSize > (2 * SrcEltSize)) {
13498 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13499 MVT InterimFVT =
13500 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13501 Src =
13502 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13503 }
13504
13505 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13506 }
13507 } else { // Narrowing + Conversion
13508 if (SrcVT.isInteger()) {
13509 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13510 // First do a narrowing convert to an FP type half the size, then round
13511 // the FP type to a small FP type if needed.
13512
13513 MVT InterimFVT = DstVT;
13514 if (SrcEltSize > (2 * DstEltSize)) {
13515 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13516 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13517 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13518 }
13519
13520 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13521
13522 if (InterimFVT != DstVT) {
13523 Src = Result;
13524 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13525 }
13526 } else {
13527 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13528 "Wrong input/output vector types");
13529 // First do a narrowing conversion to an integer half the size, then
13530 // truncate if needed.
13531
13532 if (DstEltSize == 1) {
13533 // First convert to the same size integer, then convert to mask using
13534 // setcc.
13535 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13536 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13537 DstVT.getVectorElementCount());
13538 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13539
13540 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13541 // otherwise the conversion was undefined.
13542 MVT XLenVT = Subtarget.getXLenVT();
13543 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13544 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13545 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13546 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13547 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13548 DAG.getUNDEF(DstVT), Mask, VL});
13549 } else {
13550 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13551 DstVT.getVectorElementCount());
13552
13553 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13554
13555 while (InterimIVT != DstVT) {
13556 SrcEltSize /= 2;
13557 Src = Result;
13558 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13559 DstVT.getVectorElementCount());
13560 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13561 Src, Mask, VL);
13562 }
13563 }
13564 }
13565 }
13566
13567 MVT VT = Op.getSimpleValueType();
13568 if (!VT.isFixedLengthVector())
13569 return Result;
13570 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13571}
13572
13573SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13574 SelectionDAG &DAG) const {
13575 SDLoc DL(Op);
13576 MVT VT = Op.getSimpleValueType();
13577 MVT XLenVT = Subtarget.getXLenVT();
13578
13579 SDValue Mask = Op.getOperand(0);
13580 SDValue TrueVal = Op.getOperand(1);
13581 SDValue FalseVal = Op.getOperand(2);
13582 SDValue VL = Op.getOperand(3);
13583
13584 // Use default legalization if a vector of EVL type would be legal.
13585 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13587 if (isTypeLegal(EVLVecVT))
13588 return SDValue();
13589
13590 MVT ContainerVT = VT;
13591 if (VT.isFixedLengthVector()) {
13592 ContainerVT = getContainerForFixedLengthVector(VT);
13593 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13594 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13595 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13596 }
13597
13598 // Promote to a vector of i8.
13599 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13600
13601 // Promote TrueVal and FalseVal using VLMax.
13602 // FIXME: Is there a better way to do this?
13603 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13604 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13605 DAG.getUNDEF(PromotedVT),
13606 DAG.getConstant(1, DL, XLenVT), VLMax);
13607 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13608 DAG.getUNDEF(PromotedVT),
13609 DAG.getConstant(0, DL, XLenVT), VLMax);
13610 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13611 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13612 // Any element past VL uses FalseVal, so use VLMax
13613 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13614 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13615
13616 // VP_MERGE the two promoted values.
13617 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13618 TrueVal, FalseVal, FalseVal, VL);
13619
13620 // Convert back to mask.
13621 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13622 SDValue Result = DAG.getNode(
13623 RISCVISD::SETCC_VL, DL, ContainerVT,
13624 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13625 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13626
13627 if (VT.isFixedLengthVector())
13628 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13629 return Result;
13630}
13631
13632SDValue
13633RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13634 SelectionDAG &DAG) const {
13635 using namespace SDPatternMatch;
13636
13637 SDLoc DL(Op);
13638
13639 SDValue Op1 = Op.getOperand(0);
13640 SDValue Op2 = Op.getOperand(1);
13641 SDValue Offset = Op.getOperand(2);
13642 SDValue Mask = Op.getOperand(3);
13643 SDValue EVL1 = Op.getOperand(4);
13644 SDValue EVL2 = Op.getOperand(5);
13645
13646 const MVT XLenVT = Subtarget.getXLenVT();
13647 MVT VT = Op.getSimpleValueType();
13648 MVT ContainerVT = VT;
13649 if (VT.isFixedLengthVector()) {
13650 ContainerVT = getContainerForFixedLengthVector(VT);
13651 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13652 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13653 MVT MaskVT = getMaskTypeFor(ContainerVT);
13654 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13655 }
13656
13657 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13658 if (IsMaskVector) {
13659 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13660
13661 // Expand input operands
13662 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13663 DAG.getUNDEF(ContainerVT),
13664 DAG.getConstant(1, DL, XLenVT), EVL1);
13665 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13666 DAG.getUNDEF(ContainerVT),
13667 DAG.getConstant(0, DL, XLenVT), EVL1);
13668 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13669 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13670
13671 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13672 DAG.getUNDEF(ContainerVT),
13673 DAG.getConstant(1, DL, XLenVT), EVL2);
13674 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13675 DAG.getUNDEF(ContainerVT),
13676 DAG.getConstant(0, DL, XLenVT), EVL2);
13677 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13678 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13679 }
13680
13681 auto getVectorFirstEle = [](SDValue Vec) {
13682 SDValue FirstEle;
13683 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13684 return FirstEle;
13685
13686 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13688 return Vec.getOperand(0);
13689
13690 return SDValue();
13691 };
13692
13693 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13694 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13695 MVT EltVT = ContainerVT.getVectorElementType();
13697 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13698 EltVT == MVT::bf16) {
13699 EltVT = EltVT.changeTypeToInteger();
13700 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13701 Op2 = DAG.getBitcast(ContainerVT, Op2);
13702 FirstEle =
13703 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13704 }
13705 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13706 : RISCVISD::VSLIDE1UP_VL,
13707 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13708 FirstEle, Mask, EVL2);
13709 Result = DAG.getBitcast(
13711 Result);
13712 return VT.isFixedLengthVector()
13713 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13714 : Result;
13715 }
13716
13717 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13718 SDValue DownOffset, UpOffset;
13719 if (ImmValue >= 0) {
13720 // The operand is a TargetConstant, we need to rebuild it as a regular
13721 // constant.
13722 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13723 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13724 } else {
13725 // The operand is a TargetConstant, we need to rebuild it as a regular
13726 // constant rather than negating the original operand.
13727 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13728 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13729 }
13730
13731 if (ImmValue != 0)
13732 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13733 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13734 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13735 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13736 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13737
13738 if (IsMaskVector) {
13739 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13740 Result = DAG.getNode(
13741 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13742 {Result, DAG.getConstant(0, DL, ContainerVT),
13743 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13744 Mask, EVL2});
13745 }
13746
13747 if (!VT.isFixedLengthVector())
13748 return Result;
13749 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13750}
13751
13752SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13753 SelectionDAG &DAG) const {
13754 SDLoc DL(Op);
13755 SDValue Val = Op.getOperand(0);
13756 SDValue Mask = Op.getOperand(1);
13757 SDValue VL = Op.getOperand(2);
13758 MVT VT = Op.getSimpleValueType();
13759
13760 MVT ContainerVT = VT;
13761 if (VT.isFixedLengthVector()) {
13762 ContainerVT = getContainerForFixedLengthVector(VT);
13763 MVT MaskVT = getMaskTypeFor(ContainerVT);
13764 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13765 }
13766
13768 if (VT.getScalarType() == MVT::i1) {
13769 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13770 Result =
13771 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13772 ContainerVT, VL);
13773 } else {
13774 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13775 SDValue LHS =
13776 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13777 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13778 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13779 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13780 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13781 DAG.getUNDEF(ContainerVT), Mask, VL});
13782 }
13783 } else {
13784 Result =
13785 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13786 }
13787
13788 if (!VT.isFixedLengthVector())
13789 return Result;
13790 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13791}
13792
13793SDValue
13794RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13795 SelectionDAG &DAG) const {
13796 SDLoc DL(Op);
13797 MVT VT = Op.getSimpleValueType();
13798 MVT XLenVT = Subtarget.getXLenVT();
13799
13800 SDValue Op1 = Op.getOperand(0);
13801 SDValue Mask = Op.getOperand(1);
13802 SDValue EVL = Op.getOperand(2);
13803
13804 MVT ContainerVT = VT;
13805 if (VT.isFixedLengthVector()) {
13806 ContainerVT = getContainerForFixedLengthVector(VT);
13807 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13808 MVT MaskVT = getMaskTypeFor(ContainerVT);
13809 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13810 }
13811
13812 MVT GatherVT = ContainerVT;
13813 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13814 // Check if we are working with mask vectors
13815 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13816 if (IsMaskVector) {
13817 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13818
13819 // Expand input operand
13820 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13821 DAG.getUNDEF(IndicesVT),
13822 DAG.getConstant(1, DL, XLenVT), EVL);
13823 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13824 DAG.getUNDEF(IndicesVT),
13825 DAG.getConstant(0, DL, XLenVT), EVL);
13826 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13827 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13828 }
13829
13830 unsigned EltSize = GatherVT.getScalarSizeInBits();
13831 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13832 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13833 unsigned MaxVLMAX =
13834 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13835
13836 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13837 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13838 // to use vrgatherei16.vv.
13839 // TODO: It's also possible to use vrgatherei16.vv for other types to
13840 // decrease register width for the index calculation.
13841 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13842 if (MaxVLMAX > 256 && EltSize == 8) {
13843 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13844 // Split the vector in half and reverse each half using a full register
13845 // reverse.
13846 // Swap the halves and concatenate them.
13847 // Slide the concatenated result by (VLMax - VL).
13848 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13849 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13850 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13851
13852 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13853 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13854
13855 // Reassemble the low and high pieces reversed.
13856 // NOTE: this Result is unmasked (because we do not need masks for
13857 // shuffles). If in the future this has to change, we can use a SELECT_VL
13858 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13859 SDValue Result =
13860 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13861
13862 // Slide off any elements from past EVL that were reversed into the low
13863 // elements.
13864 unsigned MinElts = GatherVT.getVectorMinNumElements();
13865 SDValue VLMax =
13866 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13867 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13868
13869 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13870 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13871
13872 if (IsMaskVector) {
13873 // Truncate Result back to a mask vector
13874 Result =
13875 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13876 {Result, DAG.getConstant(0, DL, GatherVT),
13878 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13879 }
13880
13881 if (!VT.isFixedLengthVector())
13882 return Result;
13883 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13884 }
13885
13886 // Just promote the int type to i16 which will double the LMUL.
13887 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13888 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13889 }
13890
13891 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13892 SDValue VecLen =
13893 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13894 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13895 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13896 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13897 DAG.getUNDEF(IndicesVT), Mask, EVL);
13898 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13899 DAG.getUNDEF(GatherVT), Mask, EVL);
13900
13901 if (IsMaskVector) {
13902 // Truncate Result back to a mask vector
13903 Result = DAG.getNode(
13904 RISCVISD::SETCC_VL, DL, ContainerVT,
13905 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13906 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13907 }
13908
13909 if (!VT.isFixedLengthVector())
13910 return Result;
13911 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13912}
13913
13914SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13915 SelectionDAG &DAG) const {
13916 MVT VT = Op.getSimpleValueType();
13917 if (VT.getVectorElementType() != MVT::i1)
13918 return lowerVPOp(Op, DAG);
13919
13920 // It is safe to drop mask parameter as masked-off elements are undef.
13921 SDValue Op1 = Op->getOperand(0);
13922 SDValue Op2 = Op->getOperand(1);
13923 SDValue VL = Op->getOperand(3);
13924
13925 MVT ContainerVT = VT;
13926 const bool IsFixed = VT.isFixedLengthVector();
13927 if (IsFixed) {
13928 ContainerVT = getContainerForFixedLengthVector(VT);
13929 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13930 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13931 }
13932
13933 SDLoc DL(Op);
13934 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
13935 if (!IsFixed)
13936 return Val;
13937 return convertFromScalableVector(VT, Val, DAG, Subtarget);
13938}
13939
13940SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
13941 SelectionDAG &DAG) const {
13942 SDLoc DL(Op);
13943 MVT XLenVT = Subtarget.getXLenVT();
13944 MVT VT = Op.getSimpleValueType();
13945 MVT ContainerVT = VT;
13946 if (VT.isFixedLengthVector())
13947 ContainerVT = getContainerForFixedLengthVector(VT);
13948
13949 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13950
13951 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
13952 // Check if the mask is known to be all ones
13953 SDValue Mask = VPNode->getMask();
13954 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13955
13956 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
13957 : Intrinsic::riscv_vlse_mask,
13958 DL, XLenVT);
13959 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
13960 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
13961 VPNode->getStride()};
13962 if (!IsUnmasked) {
13963 if (VT.isFixedLengthVector()) {
13964 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
13965 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13966 }
13967 Ops.push_back(Mask);
13968 }
13969 Ops.push_back(VPNode->getVectorLength());
13970 if (!IsUnmasked) {
13971 SDValue Policy =
13973 Ops.push_back(Policy);
13974 }
13975
13976 SDValue Result =
13978 VPNode->getMemoryVT(), VPNode->getMemOperand());
13979 SDValue Chain = Result.getValue(1);
13980
13981 if (VT.isFixedLengthVector())
13982 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13983
13984 return DAG.getMergeValues({Result, Chain}, DL);
13985}
13986
13987SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
13988 SelectionDAG &DAG) const {
13989 SDLoc DL(Op);
13990 MVT XLenVT = Subtarget.getXLenVT();
13991
13992 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
13993 SDValue StoreVal = VPNode->getValue();
13994 MVT VT = StoreVal.getSimpleValueType();
13995 MVT ContainerVT = VT;
13996 if (VT.isFixedLengthVector()) {
13997 ContainerVT = getContainerForFixedLengthVector(VT);
13998 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13999 }
14000
14001 // Check if the mask is known to be all ones
14002 SDValue Mask = VPNode->getMask();
14003 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14004
14005 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
14006 : Intrinsic::riscv_vsse_mask,
14007 DL, XLenVT);
14008 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14009 VPNode->getBasePtr(), VPNode->getStride()};
14010 if (!IsUnmasked) {
14011 if (VT.isFixedLengthVector()) {
14012 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14013 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14014 }
14015 Ops.push_back(Mask);
14016 }
14017 Ops.push_back(VPNode->getVectorLength());
14018
14019 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14020 Ops, VPNode->getMemoryVT(),
14021 VPNode->getMemOperand());
14022}
14023
14024// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14025// matched to a RVV indexed load. The RVV indexed load instructions only
14026// support the "unsigned unscaled" addressing mode; indices are implicitly
14027// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14028// signed or scaled indexing is extended to the XLEN value type and scaled
14029// accordingly.
14030SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14031 SelectionDAG &DAG) const {
14032 SDLoc DL(Op);
14033 MVT VT = Op.getSimpleValueType();
14034
14035 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14036 EVT MemVT = MemSD->getMemoryVT();
14037 MachineMemOperand *MMO = MemSD->getMemOperand();
14038 SDValue Chain = MemSD->getChain();
14039 SDValue BasePtr = MemSD->getBasePtr();
14040
14041 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14042 SDValue Index, Mask, PassThru, VL;
14043
14044 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14045 Index = VPGN->getIndex();
14046 Mask = VPGN->getMask();
14047 PassThru = DAG.getUNDEF(VT);
14048 VL = VPGN->getVectorLength();
14049 // VP doesn't support extending loads.
14051 } else {
14052 // Else it must be a MGATHER.
14053 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14054 Index = MGN->getIndex();
14055 Mask = MGN->getMask();
14056 PassThru = MGN->getPassThru();
14057 LoadExtType = MGN->getExtensionType();
14058 }
14059
14060 MVT IndexVT = Index.getSimpleValueType();
14061 MVT XLenVT = Subtarget.getXLenVT();
14062
14064 "Unexpected VTs!");
14065 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14066 // Targets have to explicitly opt-in for extending vector loads.
14067 assert(LoadExtType == ISD::NON_EXTLOAD &&
14068 "Unexpected extending MGATHER/VP_GATHER");
14069
14070 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14071 // the selection of the masked intrinsics doesn't do this for us.
14072 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14073
14074 MVT ContainerVT = VT;
14075 if (VT.isFixedLengthVector()) {
14076 ContainerVT = getContainerForFixedLengthVector(VT);
14077 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14078 ContainerVT.getVectorElementCount());
14079
14080 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14081
14082 if (!IsUnmasked) {
14083 MVT MaskVT = getMaskTypeFor(ContainerVT);
14084 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14085 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14086 }
14087 }
14088
14089 if (!VL)
14090 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14091
14092 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14093 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14094 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14095 }
14096
14097 unsigned IntID =
14098 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14099 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14100 if (IsUnmasked)
14101 Ops.push_back(DAG.getUNDEF(ContainerVT));
14102 else
14103 Ops.push_back(PassThru);
14104 Ops.push_back(BasePtr);
14105 Ops.push_back(Index);
14106 if (!IsUnmasked)
14107 Ops.push_back(Mask);
14108 Ops.push_back(VL);
14109 if (!IsUnmasked)
14110 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14111
14112 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14113 SDValue Result =
14114 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14115 Chain = Result.getValue(1);
14116
14117 if (VT.isFixedLengthVector())
14118 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14119
14120 return DAG.getMergeValues({Result, Chain}, DL);
14121}
14122
14123// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14124// matched to a RVV indexed store. The RVV indexed store instructions only
14125// support the "unsigned unscaled" addressing mode; indices are implicitly
14126// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14127// signed or scaled indexing is extended to the XLEN value type and scaled
14128// accordingly.
14129SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14130 SelectionDAG &DAG) const {
14131 SDLoc DL(Op);
14132 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14133 EVT MemVT = MemSD->getMemoryVT();
14134 MachineMemOperand *MMO = MemSD->getMemOperand();
14135 SDValue Chain = MemSD->getChain();
14136 SDValue BasePtr = MemSD->getBasePtr();
14137
14138 [[maybe_unused]] bool IsTruncatingStore = false;
14139 SDValue Index, Mask, Val, VL;
14140
14141 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14142 Index = VPSN->getIndex();
14143 Mask = VPSN->getMask();
14144 Val = VPSN->getValue();
14145 VL = VPSN->getVectorLength();
14146 // VP doesn't support truncating stores.
14147 IsTruncatingStore = false;
14148 } else {
14149 // Else it must be a MSCATTER.
14150 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14151 Index = MSN->getIndex();
14152 Mask = MSN->getMask();
14153 Val = MSN->getValue();
14154 IsTruncatingStore = MSN->isTruncatingStore();
14155 }
14156
14157 MVT VT = Val.getSimpleValueType();
14158 MVT IndexVT = Index.getSimpleValueType();
14159 MVT XLenVT = Subtarget.getXLenVT();
14160
14162 "Unexpected VTs!");
14163 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14164 // Targets have to explicitly opt-in for extending vector loads and
14165 // truncating vector stores.
14166 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14167
14168 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14169 // the selection of the masked intrinsics doesn't do this for us.
14170 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14171
14172 MVT ContainerVT = VT;
14173 if (VT.isFixedLengthVector()) {
14174 ContainerVT = getContainerForFixedLengthVector(VT);
14175 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14176 ContainerVT.getVectorElementCount());
14177
14178 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14179 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14180
14181 if (!IsUnmasked) {
14182 MVT MaskVT = getMaskTypeFor(ContainerVT);
14183 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14184 }
14185 }
14186
14187 if (!VL)
14188 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14189
14190 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14191 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14192 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14193 }
14194
14195 unsigned IntID =
14196 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14197 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14198 Ops.push_back(Val);
14199 Ops.push_back(BasePtr);
14200 Ops.push_back(Index);
14201 if (!IsUnmasked)
14202 Ops.push_back(Mask);
14203 Ops.push_back(VL);
14204
14206 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14207}
14208
14209SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14210 SelectionDAG &DAG) const {
14211 const MVT XLenVT = Subtarget.getXLenVT();
14212 SDLoc DL(Op);
14213 SDValue Chain = Op->getOperand(0);
14214 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14215 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14216 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14217
14218 // Encoding used for rounding mode in RISC-V differs from that used in
14219 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14220 // table, which consists of a sequence of 4-bit fields, each representing
14221 // corresponding FLT_ROUNDS mode.
14222 static const int Table =
14228
14229 SDValue Shift =
14230 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14231 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14232 DAG.getConstant(Table, DL, XLenVT), Shift);
14233 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14234 DAG.getConstant(7, DL, XLenVT));
14235
14236 return DAG.getMergeValues({Masked, Chain}, DL);
14237}
14238
14239SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14240 SelectionDAG &DAG) const {
14241 const MVT XLenVT = Subtarget.getXLenVT();
14242 SDLoc DL(Op);
14243 SDValue Chain = Op->getOperand(0);
14244 SDValue RMValue = Op->getOperand(1);
14245 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14246
14247 // Encoding used for rounding mode in RISC-V differs from that used in
14248 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14249 // a table, which consists of a sequence of 4-bit fields, each representing
14250 // corresponding RISC-V mode.
14251 static const unsigned Table =
14257
14258 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14259
14260 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14261 DAG.getConstant(2, DL, XLenVT));
14262 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14263 DAG.getConstant(Table, DL, XLenVT), Shift);
14264 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14265 DAG.getConstant(0x7, DL, XLenVT));
14266 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14267 RMValue);
14268}
14269
14270SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14271 SelectionDAG &DAG) const {
14272 const MVT XLenVT = Subtarget.getXLenVT();
14273 SDLoc DL(Op);
14274 SDValue Chain = Op->getOperand(0);
14275 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14276 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14277 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14278}
14279
14280SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14281 SelectionDAG &DAG) const {
14282 const MVT XLenVT = Subtarget.getXLenVT();
14283 SDLoc DL(Op);
14284 SDValue Chain = Op->getOperand(0);
14285 SDValue EnvValue = Op->getOperand(1);
14286 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14287
14288 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14289 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14290 EnvValue);
14291}
14292
14293SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14294 SelectionDAG &DAG) const {
14295 const MVT XLenVT = Subtarget.getXLenVT();
14296 SDLoc DL(Op);
14297 SDValue Chain = Op->getOperand(0);
14298 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14299 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14300
14301 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14302 EnvValue);
14303}
14304
14307
14308SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14309 SelectionDAG &DAG) const {
14310 const MVT XLenVT = Subtarget.getXLenVT();
14311 SDLoc DL(Op);
14312 SDValue Chain = Op->getOperand(0);
14313 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14314 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14315 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14316 Chain = Result.getValue(1);
14317 return DAG.getMergeValues({Result, Chain}, DL);
14318}
14319
14320SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14321 SelectionDAG &DAG) const {
14322 const MVT XLenVT = Subtarget.getXLenVT();
14323 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14324 SDLoc DL(Op);
14325 SDValue Chain = Op->getOperand(0);
14326 SDValue EnvValue = Op->getOperand(1);
14327 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14328 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14329
14330 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14331 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14332 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14333 ModeMask);
14334 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14335 EnvValue);
14336}
14337
14338SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14339 SelectionDAG &DAG) const {
14340 const MVT XLenVT = Subtarget.getXLenVT();
14341 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14342 SDLoc DL(Op);
14343 SDValue Chain = Op->getOperand(0);
14344 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14345 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14346
14347 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14348 ModeMask);
14349}
14350
14351SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14352 SelectionDAG &DAG) const {
14353 MachineFunction &MF = DAG.getMachineFunction();
14354
14355 bool isRISCV64 = Subtarget.is64Bit();
14356 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14357
14358 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14359 return DAG.getFrameIndex(FI, PtrVT);
14360}
14361
14362// Returns the opcode of the target-specific SDNode that implements the 32-bit
14363// form of the given Opcode.
14364static unsigned getRISCVWOpcode(unsigned Opcode) {
14365 switch (Opcode) {
14366 default:
14367 llvm_unreachable("Unexpected opcode");
14368 case ISD::SHL:
14369 return RISCVISD::SLLW;
14370 case ISD::SRA:
14371 return RISCVISD::SRAW;
14372 case ISD::SRL:
14373 return RISCVISD::SRLW;
14374 case ISD::SDIV:
14375 return RISCVISD::DIVW;
14376 case ISD::UDIV:
14377 return RISCVISD::DIVUW;
14378 case ISD::UREM:
14379 return RISCVISD::REMUW;
14380 case ISD::ROTL:
14381 return RISCVISD::ROLW;
14382 case ISD::ROTR:
14383 return RISCVISD::RORW;
14384 }
14385}
14386
14387// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14388// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14389// otherwise be promoted to i64, making it difficult to select the
14390// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14391// type i8/i16/i32 is lost.
14393 unsigned ExtOpc = ISD::ANY_EXTEND) {
14394 SDLoc DL(N);
14395 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14396 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14397 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14398 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14399 // ReplaceNodeResults requires we maintain the same type for the return value.
14400 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14401}
14402
14403// Converts the given 32-bit operation to a i64 operation with signed extension
14404// semantic to reduce the signed extension instructions.
14406 SDLoc DL(N);
14407 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14408 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14409 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14410 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14411 DAG.getValueType(MVT::i32));
14412 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14413}
14414
14417 SelectionDAG &DAG) const {
14418 SDLoc DL(N);
14419 switch (N->getOpcode()) {
14420 default:
14421 llvm_unreachable("Don't know how to custom type legalize this operation!");
14424 case ISD::FP_TO_SINT:
14425 case ISD::FP_TO_UINT: {
14426 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14427 "Unexpected custom legalisation");
14428 bool IsStrict = N->isStrictFPOpcode();
14429 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14430 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14431 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14432 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14434 if (!isTypeLegal(Op0.getValueType()))
14435 return;
14436 if (IsStrict) {
14437 SDValue Chain = N->getOperand(0);
14438 // In absence of Zfh, promote f16 to f32, then convert.
14439 if (Op0.getValueType() == MVT::f16 &&
14440 !Subtarget.hasStdExtZfhOrZhinx()) {
14441 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14442 {Chain, Op0});
14443 Chain = Op0.getValue(1);
14444 }
14445 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14446 : RISCVISD::STRICT_FCVT_WU_RV64;
14447 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14448 SDValue Res = DAG.getNode(
14449 Opc, DL, VTs, Chain, Op0,
14450 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14451 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14452 Results.push_back(Res.getValue(1));
14453 return;
14454 }
14455 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14456 // convert.
14457 if ((Op0.getValueType() == MVT::f16 &&
14458 !Subtarget.hasStdExtZfhOrZhinx()) ||
14459 Op0.getValueType() == MVT::bf16)
14460 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14461
14462 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14463 SDValue Res =
14464 DAG.getNode(Opc, DL, MVT::i64, Op0,
14465 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14466 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14467 return;
14468 }
14469 // If the FP type needs to be softened, emit a library call using the 'si'
14470 // version. If we left it to default legalization we'd end up with 'di'. If
14471 // the FP type doesn't need to be softened just let generic type
14472 // legalization promote the result type.
14473 RTLIB::Libcall LC;
14474 if (IsSigned)
14475 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14476 else
14477 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14478 MakeLibCallOptions CallOptions;
14479 EVT OpVT = Op0.getValueType();
14480 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14481 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14482 SDValue Result;
14483 std::tie(Result, Chain) =
14484 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14485 Results.push_back(Result);
14486 if (IsStrict)
14487 Results.push_back(Chain);
14488 break;
14489 }
14490 case ISD::LROUND: {
14491 SDValue Op0 = N->getOperand(0);
14492 EVT Op0VT = Op0.getValueType();
14493 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14495 if (!isTypeLegal(Op0VT))
14496 return;
14497
14498 // In absence of Zfh, promote f16 to f32, then convert.
14499 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14500 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14501
14502 SDValue Res =
14503 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14504 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14505 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14506 return;
14507 }
14508 // If the FP type needs to be softened, emit a library call to lround. We'll
14509 // need to truncate the result. We assume any value that doesn't fit in i32
14510 // is allowed to return an unspecified value.
14511 RTLIB::Libcall LC =
14512 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14513 MakeLibCallOptions CallOptions;
14514 EVT OpVT = Op0.getValueType();
14515 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14516 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14517 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14518 Results.push_back(Result);
14519 break;
14520 }
14521 case ISD::READCYCLECOUNTER:
14522 case ISD::READSTEADYCOUNTER: {
14523 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14524 "has custom type legalization on riscv32");
14525
14526 SDValue LoCounter, HiCounter;
14527 MVT XLenVT = Subtarget.getXLenVT();
14528 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14529 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14530 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14531 } else {
14532 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14533 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14534 }
14535 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14536 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14537 N->getOperand(0), LoCounter, HiCounter);
14538
14539 Results.push_back(
14540 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14541 Results.push_back(RCW.getValue(2));
14542 break;
14543 }
14544 case ISD::LOAD: {
14545 if (!ISD::isNON_EXTLoad(N))
14546 return;
14547
14548 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14549 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14551
14552 if (N->getValueType(0) == MVT::i64) {
14553 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14554 "Unexpected custom legalisation");
14555
14556 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14557 return;
14558
14559 SDLoc DL(N);
14560 SDValue Result = DAG.getMemIntrinsicNode(
14561 RISCVISD::LD_RV32, DL,
14562 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14563 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14564 SDValue Lo = Result.getValue(0);
14565 SDValue Hi = Result.getValue(1);
14566 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14567 Results.append({Pair, Result.getValue(2)});
14568 return;
14569 }
14570
14571 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14572 "Unexpected custom legalisation");
14573
14574 SDLoc dl(N);
14575 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14576 Ld->getBasePtr(), Ld->getMemoryVT(),
14577 Ld->getMemOperand());
14578 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14579 Results.push_back(Res.getValue(1));
14580 return;
14581 }
14582 case ISD::MUL: {
14583 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14584 unsigned XLen = Subtarget.getXLen();
14585 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14586 if (Size > XLen) {
14587 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14588 SDValue LHS = N->getOperand(0);
14589 SDValue RHS = N->getOperand(1);
14590 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14591
14592 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14593 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14594 // We need exactly one side to be unsigned.
14595 if (LHSIsU == RHSIsU)
14596 return;
14597
14598 auto MakeMULPair = [&](SDValue S, SDValue U) {
14599 MVT XLenVT = Subtarget.getXLenVT();
14600 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14601 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14602 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14603 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14604 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14605 };
14606
14607 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14608 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14609
14610 // The other operand should be signed, but still prefer MULH when
14611 // possible.
14612 if (RHSIsU && LHSIsS && !RHSIsS)
14613 Results.push_back(MakeMULPair(LHS, RHS));
14614 else if (LHSIsU && RHSIsS && !LHSIsS)
14615 Results.push_back(MakeMULPair(RHS, LHS));
14616
14617 return;
14618 }
14619 [[fallthrough]];
14620 }
14621 case ISD::ADD:
14622 case ISD::SUB:
14623 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14624 "Unexpected custom legalisation");
14625 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14626 break;
14627 case ISD::SHL:
14628 case ISD::SRA:
14629 case ISD::SRL:
14630 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14631 "Unexpected custom legalisation");
14632 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14633 // If we can use a BSET instruction, allow default promotion to apply.
14634 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14635 isOneConstant(N->getOperand(0)))
14636 break;
14637 Results.push_back(customLegalizeToWOp(N, DAG));
14638 break;
14639 }
14640
14641 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14642 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14643 // shift amount.
14644 if (N->getOpcode() == ISD::SHL) {
14645 SDLoc DL(N);
14646 SDValue NewOp0 =
14647 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14648 SDValue NewOp1 =
14649 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14650 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14651 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14652 DAG.getValueType(MVT::i32));
14653 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14654 }
14655
14656 break;
14657 case ISD::ROTL:
14658 case ISD::ROTR:
14659 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14660 "Unexpected custom legalisation");
14661 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14662 Subtarget.hasVendorXTHeadBb()) &&
14663 "Unexpected custom legalization");
14664 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14665 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14666 return;
14667 Results.push_back(customLegalizeToWOp(N, DAG));
14668 break;
14669 case ISD::CTTZ:
14671 case ISD::CTLZ:
14672 case ISD::CTLZ_ZERO_UNDEF: {
14673 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14674 "Unexpected custom legalisation");
14675
14676 SDValue NewOp0 =
14677 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14678 bool IsCTZ =
14679 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14680
14681 // Without Zbb, lower as 32 - clzw(~X & (X-1))
14682 if (IsCTZ && !Subtarget.hasStdExtZbb()) {
14683 assert(Subtarget.hasStdExtP());
14684
14685 NewOp0 = DAG.getFreeze(NewOp0);
14686 SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64);
14687 SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0,
14688 DAG.getConstant(1, DL, MVT::i64));
14689 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1);
14690 SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And);
14691 SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64,
14692 DAG.getConstant(32, DL, MVT::i64), CLZW);
14693 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub,
14694 DAG.getValueType(MVT::i32));
14695 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14696 return;
14697 }
14698
14699 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14700 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14701 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14702 return;
14703 }
14704 case ISD::SDIV:
14705 case ISD::UDIV:
14706 case ISD::UREM: {
14707 MVT VT = N->getSimpleValueType(0);
14708 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14709 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14710 "Unexpected custom legalisation");
14711 // Don't promote division/remainder by constant since we should expand those
14712 // to multiply by magic constant.
14713 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14714 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14715 !isIntDivCheap(N->getValueType(0), Attr))
14716 return;
14717
14718 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14719 // the upper 32 bits. For other types we need to sign or zero extend
14720 // based on the opcode.
14721 unsigned ExtOpc = ISD::ANY_EXTEND;
14722 if (VT != MVT::i32)
14723 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14725
14726 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14727 break;
14728 }
14729 case ISD::SADDO: {
14730 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14731 "Unexpected custom legalisation");
14732
14733 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14734 // use the default legalization.
14735 if (!isa<ConstantSDNode>(N->getOperand(1)))
14736 return;
14737
14738 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14739 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14740 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14741 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14742 DAG.getValueType(MVT::i32));
14743
14744 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14745
14746 // For an addition, the result should be less than one of the operands (LHS)
14747 // if and only if the other operand (RHS) is negative, otherwise there will
14748 // be overflow.
14749 // For a subtraction, the result should be less than one of the operands
14750 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14751 // otherwise there will be overflow.
14752 EVT OType = N->getValueType(1);
14753 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14754 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14755
14756 SDValue Overflow =
14757 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14758 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14759 Results.push_back(Overflow);
14760 return;
14761 }
14762 case ISD::UADDO:
14763 case ISD::USUBO: {
14764 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14765 "Unexpected custom legalisation");
14766 bool IsAdd = N->getOpcode() == ISD::UADDO;
14767 // Create an ADDW or SUBW.
14768 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14769 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14770 SDValue Res =
14771 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14772 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14773 DAG.getValueType(MVT::i32));
14774
14775 SDValue Overflow;
14776 if (IsAdd && isOneConstant(RHS)) {
14777 // Special case uaddo X, 1 overflowed if the addition result is 0.
14778 // The general case (X + C) < C is not necessarily beneficial. Although we
14779 // reduce the live range of X, we may introduce the materialization of
14780 // constant C, especially when the setcc result is used by branch. We have
14781 // no compare with constant and branch instructions.
14782 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14783 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14784 } else if (IsAdd && isAllOnesConstant(RHS)) {
14785 // Special case uaddo X, -1 overflowed if X != 0.
14786 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14787 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14788 } else {
14789 // Sign extend the LHS and perform an unsigned compare with the ADDW
14790 // result. Since the inputs are sign extended from i32, this is equivalent
14791 // to comparing the lower 32 bits.
14792 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14793 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14794 IsAdd ? ISD::SETULT : ISD::SETUGT);
14795 }
14796
14797 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14798 Results.push_back(Overflow);
14799 return;
14800 }
14801 case ISD::UADDSAT:
14802 case ISD::USUBSAT: {
14803 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14804 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14805 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14806 // promotion for UADDO/USUBO.
14807 Results.push_back(expandAddSubSat(N, DAG));
14808 return;
14809 }
14810 case ISD::SADDSAT:
14811 case ISD::SSUBSAT: {
14812 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14813 "Unexpected custom legalisation");
14814 Results.push_back(expandAddSubSat(N, DAG));
14815 return;
14816 }
14817 case ISD::ABS: {
14818 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14819 "Unexpected custom legalisation");
14820
14821 if (Subtarget.hasStdExtP()) {
14822 SDValue Src =
14823 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14824 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14825 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14826 return;
14827 }
14828
14829 if (Subtarget.hasStdExtZbb()) {
14830 // Emit a special node that will be expanded to NEGW+MAX at isel.
14831 // This allows us to remember that the result is sign extended. Expanding
14832 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14833 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14834 N->getOperand(0));
14835 SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);
14836 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14837 return;
14838 }
14839
14840 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14841 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14842
14843 // Freeze the source so we can increase it's use count.
14844 Src = DAG.getFreeze(Src);
14845
14846 // Copy sign bit to all bits using the sraiw pattern.
14847 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14848 DAG.getValueType(MVT::i32));
14849 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14850 DAG.getConstant(31, DL, MVT::i64));
14851
14852 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14853 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14854
14855 // NOTE: The result is only required to be anyextended, but sext is
14856 // consistent with type legalization of sub.
14857 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14858 DAG.getValueType(MVT::i32));
14859 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14860 return;
14861 }
14862 case ISD::BITCAST: {
14863 EVT VT = N->getValueType(0);
14864 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14865 SDValue Op0 = N->getOperand(0);
14866 EVT Op0VT = Op0.getValueType();
14867 MVT XLenVT = Subtarget.getXLenVT();
14868 if (VT == MVT::i16 &&
14869 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14870 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14871 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14872 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14873 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14874 Subtarget.hasStdExtFOrZfinx()) {
14875 SDValue FPConv =
14876 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14877 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14878 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14879 Subtarget.hasStdExtDOrZdinx()) {
14880 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14881 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14882 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14883 NewReg.getValue(0), NewReg.getValue(1));
14884 Results.push_back(RetReg);
14885 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14886 isTypeLegal(Op0VT)) {
14887 // Custom-legalize bitcasts from fixed-length vector types to illegal
14888 // scalar types in order to improve codegen. Bitcast the vector to a
14889 // one-element vector type whose element type is the same as the result
14890 // type, and extract the first element.
14891 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14892 if (isTypeLegal(BVT)) {
14893 SDValue BVec = DAG.getBitcast(BVT, Op0);
14894 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14895 }
14896 }
14897 break;
14898 }
14899 case ISD::BITREVERSE: {
14900 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14901 "Unexpected custom legalisation");
14902 MVT XLenVT = Subtarget.getXLenVT();
14903 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14904 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14905 // ReplaceNodeResults requires we maintain the same type for the return
14906 // value.
14907 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14908 break;
14909 }
14910 case RISCVISD::BREV8:
14911 case RISCVISD::ORC_B: {
14912 MVT VT = N->getSimpleValueType(0);
14913 MVT XLenVT = Subtarget.getXLenVT();
14914 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14915 "Unexpected custom legalisation");
14916 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14917 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14918 "Unexpected extension");
14919 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14920 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14921 // ReplaceNodeResults requires we maintain the same type for the return
14922 // value.
14923 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14924 break;
14925 }
14927 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
14928 // type is illegal (currently only vXi64 RV32).
14929 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
14930 // transferred to the destination register. We issue two of these from the
14931 // upper- and lower- halves of the SEW-bit vector element, slid down to the
14932 // first element.
14933 SDValue Vec = N->getOperand(0);
14934 SDValue Idx = N->getOperand(1);
14935
14936 // The vector type hasn't been legalized yet so we can't issue target
14937 // specific nodes if it needs legalization.
14938 // FIXME: We would manually legalize if it's important.
14939 if (!isTypeLegal(Vec.getValueType()))
14940 return;
14941
14942 MVT VecVT = Vec.getSimpleValueType();
14943
14944 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
14945 VecVT.getVectorElementType() == MVT::i64 &&
14946 "Unexpected EXTRACT_VECTOR_ELT legalization");
14947
14948 // If this is a fixed vector, we need to convert it to a scalable vector.
14949 MVT ContainerVT = VecVT;
14950 if (VecVT.isFixedLengthVector()) {
14951 ContainerVT = getContainerForFixedLengthVector(VecVT);
14952 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
14953 }
14954
14955 MVT XLenVT = Subtarget.getXLenVT();
14956
14957 // Use a VL of 1 to avoid processing more elements than we need.
14958 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
14959
14960 // Unless the index is known to be 0, we must slide the vector down to get
14961 // the desired element into index 0.
14962 if (!isNullConstant(Idx)) {
14963 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14964 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
14965 }
14966
14967 // Extract the lower XLEN bits of the correct vector element.
14968 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
14969
14970 // To extract the upper XLEN bits of the vector element, shift the first
14971 // element right by 32 bits and re-extract the lower XLEN bits.
14972 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14973 DAG.getUNDEF(ContainerVT),
14974 DAG.getConstant(32, DL, XLenVT), VL);
14975 SDValue LShr32 =
14976 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
14977 DAG.getUNDEF(ContainerVT), Mask, VL);
14978
14979 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
14980
14981 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
14982 break;
14983 }
14985 unsigned IntNo = N->getConstantOperandVal(0);
14986 switch (IntNo) {
14987 default:
14989 "Don't know how to custom type legalize this intrinsic!");
14990 case Intrinsic::experimental_get_vector_length: {
14991 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
14992 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14993 return;
14994 }
14995 case Intrinsic::experimental_cttz_elts: {
14996 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
14997 Results.push_back(
14998 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
14999 return;
15000 }
15001 case Intrinsic::riscv_orc_b:
15002 case Intrinsic::riscv_brev8:
15003 case Intrinsic::riscv_sha256sig0:
15004 case Intrinsic::riscv_sha256sig1:
15005 case Intrinsic::riscv_sha256sum0:
15006 case Intrinsic::riscv_sha256sum1:
15007 case Intrinsic::riscv_sm3p0:
15008 case Intrinsic::riscv_sm3p1: {
15009 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15010 return;
15011 unsigned Opc;
15012 switch (IntNo) {
15013 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
15014 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
15015 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
15016 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
15017 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
15018 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
15019 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
15020 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
15021 }
15022
15023 SDValue NewOp =
15024 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15025 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
15026 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15027 return;
15028 }
15029 case Intrinsic::riscv_sm4ks:
15030 case Intrinsic::riscv_sm4ed: {
15031 unsigned Opc =
15032 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
15033 SDValue NewOp0 =
15034 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15035 SDValue NewOp1 =
15036 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15037 SDValue Res =
15038 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15039 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15040 return;
15041 }
15042 case Intrinsic::riscv_mopr: {
15043 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15044 return;
15045 SDValue NewOp =
15046 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15047 SDValue Res = DAG.getNode(
15048 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15049 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15050 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15051 return;
15052 }
15053 case Intrinsic::riscv_moprr: {
15054 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15055 return;
15056 SDValue NewOp0 =
15057 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15058 SDValue NewOp1 =
15059 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15060 SDValue Res = DAG.getNode(
15061 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15062 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15063 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15064 return;
15065 }
15066 case Intrinsic::riscv_clmul: {
15067 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15068 return;
15069
15070 SDValue NewOp0 =
15071 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15072 SDValue NewOp1 =
15073 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15074 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15075 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15076 return;
15077 }
15078 case Intrinsic::riscv_clmulh:
15079 case Intrinsic::riscv_clmulr: {
15080 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15081 return;
15082
15083 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15084 // to the full 128-bit clmul result of multiplying two xlen values.
15085 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15086 // upper 32 bits.
15087 //
15088 // The alternative is to mask the inputs to 32 bits and use clmul, but
15089 // that requires two shifts to mask each input without zext.w.
15090 // FIXME: If the inputs are known zero extended or could be freely
15091 // zero extended, the mask form would be better.
15092 SDValue NewOp0 =
15093 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15094 SDValue NewOp1 =
15095 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15096 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15097 DAG.getConstant(32, DL, MVT::i64));
15098 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15099 DAG.getConstant(32, DL, MVT::i64));
15100 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15101 : RISCVISD::CLMULR;
15102 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15103 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15104 DAG.getConstant(32, DL, MVT::i64));
15105 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15106 return;
15107 }
15108 case Intrinsic::riscv_vmv_x_s: {
15109 EVT VT = N->getValueType(0);
15110 MVT XLenVT = Subtarget.getXLenVT();
15111 if (VT.bitsLT(XLenVT)) {
15112 // Simple case just extract using vmv.x.s and truncate.
15113 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15114 Subtarget.getXLenVT(), N->getOperand(1));
15115 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15116 return;
15117 }
15118
15119 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15120 "Unexpected custom legalization");
15121
15122 // We need to do the move in two steps.
15123 SDValue Vec = N->getOperand(1);
15124 MVT VecVT = Vec.getSimpleValueType();
15125
15126 // First extract the lower XLEN bits of the element.
15127 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15128
15129 // To extract the upper XLEN bits of the vector element, shift the first
15130 // element right by 32 bits and re-extract the lower XLEN bits.
15131 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15132
15133 SDValue ThirtyTwoV =
15134 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15135 DAG.getConstant(32, DL, XLenVT), VL);
15136 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15137 DAG.getUNDEF(VecVT), Mask, VL);
15138 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15139
15140 Results.push_back(
15141 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15142 break;
15143 }
15144 }
15145 break;
15146 }
15147 case ISD::VECREDUCE_ADD:
15148 case ISD::VECREDUCE_AND:
15149 case ISD::VECREDUCE_OR:
15150 case ISD::VECREDUCE_XOR:
15151 case ISD::VECREDUCE_SMAX:
15152 case ISD::VECREDUCE_UMAX:
15153 case ISD::VECREDUCE_SMIN:
15154 case ISD::VECREDUCE_UMIN:
15155 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15156 Results.push_back(V);
15157 break;
15158 case ISD::VP_REDUCE_ADD:
15159 case ISD::VP_REDUCE_AND:
15160 case ISD::VP_REDUCE_OR:
15161 case ISD::VP_REDUCE_XOR:
15162 case ISD::VP_REDUCE_SMAX:
15163 case ISD::VP_REDUCE_UMAX:
15164 case ISD::VP_REDUCE_SMIN:
15165 case ISD::VP_REDUCE_UMIN:
15166 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15167 Results.push_back(V);
15168 break;
15169 case ISD::GET_ROUNDING: {
15170 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15171 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15172 Results.push_back(Res.getValue(0));
15173 Results.push_back(Res.getValue(1));
15174 break;
15175 }
15176 }
15177}
15178
15179/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15180/// which corresponds to it.
15181static unsigned getVecReduceOpcode(unsigned Opc) {
15182 switch (Opc) {
15183 default:
15184 llvm_unreachable("Unhandled binary to transform reduction");
15185 case ISD::ADD:
15186 return ISD::VECREDUCE_ADD;
15187 case ISD::UMAX:
15188 return ISD::VECREDUCE_UMAX;
15189 case ISD::SMAX:
15190 return ISD::VECREDUCE_SMAX;
15191 case ISD::UMIN:
15192 return ISD::VECREDUCE_UMIN;
15193 case ISD::SMIN:
15194 return ISD::VECREDUCE_SMIN;
15195 case ISD::AND:
15196 return ISD::VECREDUCE_AND;
15197 case ISD::OR:
15198 return ISD::VECREDUCE_OR;
15199 case ISD::XOR:
15200 return ISD::VECREDUCE_XOR;
15201 case ISD::FADD:
15202 // Note: This is the associative form of the generic reduction opcode.
15203 return ISD::VECREDUCE_FADD;
15204 case ISD::FMAXNUM:
15205 return ISD::VECREDUCE_FMAX;
15206 case ISD::FMINNUM:
15207 return ISD::VECREDUCE_FMIN;
15208 }
15209}
15210
15211/// Perform two related transforms whose purpose is to incrementally recognize
15212/// an explode_vector followed by scalar reduction as a vector reduction node.
15213/// This exists to recover from a deficiency in SLP which can't handle
15214/// forests with multiple roots sharing common nodes. In some cases, one
15215/// of the trees will be vectorized, and the other will remain (unprofitably)
15216/// scalarized.
15217static SDValue
15219 const RISCVSubtarget &Subtarget) {
15220
15221 // This transforms need to run before all integer types have been legalized
15222 // to i64 (so that the vector element type matches the add type), and while
15223 // it's safe to introduce odd sized vector types.
15225 return SDValue();
15226
15227 // Without V, this transform isn't useful. We could form the (illegal)
15228 // operations and let them be scalarized again, but there's really no point.
15229 if (!Subtarget.hasVInstructions())
15230 return SDValue();
15231
15232 const SDLoc DL(N);
15233 const EVT VT = N->getValueType(0);
15234 const unsigned Opc = N->getOpcode();
15235
15236 if (!VT.isInteger()) {
15237 switch (Opc) {
15238 default:
15239 return SDValue();
15240 case ISD::FADD:
15241 // For FADD, we only handle the case with reassociation allowed. We
15242 // could handle strict reduction order, but at the moment, there's no
15243 // known reason to, and the complexity isn't worth it.
15244 if (!N->getFlags().hasAllowReassociation())
15245 return SDValue();
15246 break;
15247 case ISD::FMAXNUM:
15248 case ISD::FMINNUM:
15249 break;
15250 }
15251 }
15252
15253 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15254 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15255 "Inconsistent mappings");
15256 SDValue LHS = N->getOperand(0);
15257 SDValue RHS = N->getOperand(1);
15258
15259 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15260 return SDValue();
15261
15262 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15263 std::swap(LHS, RHS);
15264
15265 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15266 !isa<ConstantSDNode>(RHS.getOperand(1)))
15267 return SDValue();
15268
15269 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15270 SDValue SrcVec = RHS.getOperand(0);
15271 EVT SrcVecVT = SrcVec.getValueType();
15272 assert(SrcVecVT.getVectorElementType() == VT);
15273 if (SrcVecVT.isScalableVector())
15274 return SDValue();
15275
15276 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15277 return SDValue();
15278
15279 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15280 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15281 // root of our reduction tree. TODO: We could extend this to any two
15282 // adjacent aligned constant indices if desired.
15283 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15284 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15285 uint64_t LHSIdx =
15286 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15287 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15288 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15289 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15290 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15291 }
15292 }
15293
15294 // Match (binop (reduce (extract_subvector V, 0),
15295 // (extract_vector_elt V, sizeof(SubVec))))
15296 // into a reduction of one more element from the original vector V.
15297 if (LHS.getOpcode() != ReduceOpc)
15298 return SDValue();
15299
15300 SDValue ReduceVec = LHS.getOperand(0);
15301 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15302 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15303 isNullConstant(ReduceVec.getOperand(1)) &&
15304 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15305 // For illegal types (e.g. 3xi32), most will be combined again into a
15306 // wider (hopefully legal) type. If this is a terminal state, we are
15307 // relying on type legalization here to produce something reasonable
15308 // and this lowering quality could probably be improved. (TODO)
15309 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15310 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15311 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15312 ReduceVec->getFlags() & N->getFlags());
15313 }
15314
15315 return SDValue();
15316}
15317
15318
15319// Try to fold (<bop> x, (reduction.<bop> vec, start))
15321 const RISCVSubtarget &Subtarget) {
15322 auto BinOpToRVVReduce = [](unsigned Opc) {
15323 switch (Opc) {
15324 default:
15325 llvm_unreachable("Unhandled binary to transform reduction");
15326 case ISD::ADD:
15327 return RISCVISD::VECREDUCE_ADD_VL;
15328 case ISD::UMAX:
15329 return RISCVISD::VECREDUCE_UMAX_VL;
15330 case ISD::SMAX:
15331 return RISCVISD::VECREDUCE_SMAX_VL;
15332 case ISD::UMIN:
15333 return RISCVISD::VECREDUCE_UMIN_VL;
15334 case ISD::SMIN:
15335 return RISCVISD::VECREDUCE_SMIN_VL;
15336 case ISD::AND:
15337 return RISCVISD::VECREDUCE_AND_VL;
15338 case ISD::OR:
15339 return RISCVISD::VECREDUCE_OR_VL;
15340 case ISD::XOR:
15341 return RISCVISD::VECREDUCE_XOR_VL;
15342 case ISD::FADD:
15343 return RISCVISD::VECREDUCE_FADD_VL;
15344 case ISD::FMAXNUM:
15345 return RISCVISD::VECREDUCE_FMAX_VL;
15346 case ISD::FMINNUM:
15347 return RISCVISD::VECREDUCE_FMIN_VL;
15348 }
15349 };
15350
15351 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15352 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15353 isNullConstant(V.getOperand(1)) &&
15354 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15355 };
15356
15357 unsigned Opc = N->getOpcode();
15358 unsigned ReduceIdx;
15359 if (IsReduction(N->getOperand(0), Opc))
15360 ReduceIdx = 0;
15361 else if (IsReduction(N->getOperand(1), Opc))
15362 ReduceIdx = 1;
15363 else
15364 return SDValue();
15365
15366 // Skip if FADD disallows reassociation but the combiner needs.
15367 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15368 return SDValue();
15369
15370 SDValue Extract = N->getOperand(ReduceIdx);
15371 SDValue Reduce = Extract.getOperand(0);
15372 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15373 return SDValue();
15374
15375 SDValue ScalarV = Reduce.getOperand(2);
15376 EVT ScalarVT = ScalarV.getValueType();
15377 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15378 ScalarV.getOperand(0)->isUndef() &&
15379 isNullConstant(ScalarV.getOperand(2)))
15380 ScalarV = ScalarV.getOperand(1);
15381
15382 // Make sure that ScalarV is a splat with VL=1.
15383 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15384 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15385 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15386 return SDValue();
15387
15388 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15389 return SDValue();
15390
15391 // Check the scalar of ScalarV is neutral element
15392 // TODO: Deal with value other than neutral element.
15393 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15394 0))
15395 return SDValue();
15396
15397 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15398 // FIXME: We might be able to improve this if operand 0 is undef.
15399 if (!isNonZeroAVL(Reduce.getOperand(5)))
15400 return SDValue();
15401
15402 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15403
15404 SDLoc DL(N);
15405 SDValue NewScalarV =
15406 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15407 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15408
15409 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15410 if (ScalarVT != ScalarV.getValueType())
15411 NewScalarV =
15412 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15413
15414 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15415 NewScalarV, Reduce.getOperand(3),
15416 Reduce.getOperand(4), Reduce.getOperand(5)};
15417 SDValue NewReduce =
15418 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15419 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15420 Extract.getOperand(1));
15421}
15422
15423// Optimize (add (shl x, c0), (shl y, c1)) ->
15424// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15425// or
15426// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15428 const RISCVSubtarget &Subtarget) {
15429 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15430 // extension.
15431 if (!Subtarget.hasShlAdd(3))
15432 return SDValue();
15433
15434 // Skip for vector types and larger types.
15435 EVT VT = N->getValueType(0);
15436 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15437 return SDValue();
15438
15439 // The two operand nodes must be SHL and have no other use.
15440 SDValue N0 = N->getOperand(0);
15441 SDValue N1 = N->getOperand(1);
15442 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15443 !N0->hasOneUse() || !N1->hasOneUse())
15444 return SDValue();
15445
15446 // Check c0 and c1.
15447 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15448 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15449 if (!N0C || !N1C)
15450 return SDValue();
15451 int64_t C0 = N0C->getSExtValue();
15452 int64_t C1 = N1C->getSExtValue();
15453 if (C0 <= 0 || C1 <= 0)
15454 return SDValue();
15455
15456 int64_t Diff = std::abs(C0 - C1);
15457 if (!Subtarget.hasShlAdd(Diff))
15458 return SDValue();
15459
15460 // Build nodes.
15461 SDLoc DL(N);
15462 int64_t Bits = std::min(C0, C1);
15463 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15464 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15465 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15466 DAG.getTargetConstant(Diff, DL, VT), NS);
15467 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15468}
15469
15470// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15471// or 3.
15473 SelectionDAG &DAG) {
15474 using namespace llvm::SDPatternMatch;
15475
15476 // Looking for a reg-reg add and not an addi.
15477 if (isa<ConstantSDNode>(N->getOperand(1)))
15478 return SDValue();
15479
15480 // Based on testing it seems that performance degrades if the ADDI has
15481 // more than 2 uses.
15482 if (AddI->use_size() > 2)
15483 return SDValue();
15484
15485 APInt AddVal;
15486 SDValue SHLVal;
15487 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15488 return SDValue();
15489
15490 APInt VShift;
15491 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15492 return SDValue();
15493
15494 if (VShift.slt(1) || VShift.sgt(3))
15495 return SDValue();
15496
15497 SDLoc DL(N);
15498 EVT VT = N->getValueType(0);
15499 // The shift must be positive but the add can be signed.
15500 uint64_t ShlConst = VShift.getZExtValue();
15501 int64_t AddConst = AddVal.getSExtValue();
15502
15503 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15504 DAG.getTargetConstant(ShlConst, DL, VT), Other);
15505 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15506 DAG.getSignedConstant(AddConst, DL, VT));
15507}
15508
15509// Optimize (add (add (shl x, c0), c1), y) ->
15510// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15512 const RISCVSubtarget &Subtarget) {
15513 // Perform this optimization only in the zba extension.
15514 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15515 return SDValue();
15516
15517 // Skip for vector types and larger types.
15518 EVT VT = N->getValueType(0);
15519 if (VT != Subtarget.getXLenVT())
15520 return SDValue();
15521
15522 SDValue AddI = N->getOperand(0);
15523 SDValue Other = N->getOperand(1);
15524 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15525 return V;
15526 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15527 return V;
15528 return SDValue();
15529}
15530
15531// Combine a constant select operand into its use:
15532//
15533// (and (select cond, -1, c), x)
15534// -> (select cond, x, (and x, c)) [AllOnes=1]
15535// (or (select cond, 0, c), x)
15536// -> (select cond, x, (or x, c)) [AllOnes=0]
15537// (xor (select cond, 0, c), x)
15538// -> (select cond, x, (xor x, c)) [AllOnes=0]
15539// (add (select cond, 0, c), x)
15540// -> (select cond, x, (add x, c)) [AllOnes=0]
15541// (sub x, (select cond, 0, c))
15542// -> (select cond, x, (sub x, c)) [AllOnes=0]
15544 SelectionDAG &DAG, bool AllOnes,
15545 const RISCVSubtarget &Subtarget) {
15546 EVT VT = N->getValueType(0);
15547
15548 // Skip vectors.
15549 if (VT.isVector())
15550 return SDValue();
15551
15552 if (!Subtarget.hasConditionalMoveFusion()) {
15553 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15554 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15555 return SDValue();
15556
15557 // Maybe harmful when condition code has multiple use.
15558 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15559 return SDValue();
15560
15561 // Maybe harmful when VT is wider than XLen.
15562 if (VT.getSizeInBits() > Subtarget.getXLen())
15563 return SDValue();
15564 }
15565
15566 if ((Slct.getOpcode() != ISD::SELECT &&
15567 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15568 !Slct.hasOneUse())
15569 return SDValue();
15570
15571 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15573 };
15574
15575 bool SwapSelectOps;
15576 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15577 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15578 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15579 SDValue NonConstantVal;
15580 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15581 SwapSelectOps = false;
15582 NonConstantVal = FalseVal;
15583 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15584 SwapSelectOps = true;
15585 NonConstantVal = TrueVal;
15586 } else
15587 return SDValue();
15588
15589 // Slct is now know to be the desired identity constant when CC is true.
15590 TrueVal = OtherOp;
15591 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15592 // Unless SwapSelectOps says the condition should be false.
15593 if (SwapSelectOps)
15594 std::swap(TrueVal, FalseVal);
15595
15596 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15597 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15598 {Slct.getOperand(0), Slct.getOperand(1),
15599 Slct.getOperand(2), TrueVal, FalseVal});
15600
15601 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15602 {Slct.getOperand(0), TrueVal, FalseVal});
15603}
15604
15605// Attempt combineSelectAndUse on each operand of a commutative operator N.
15607 bool AllOnes,
15608 const RISCVSubtarget &Subtarget) {
15609 SDValue N0 = N->getOperand(0);
15610 SDValue N1 = N->getOperand(1);
15611 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15612 return Result;
15613 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15614 return Result;
15615 return SDValue();
15616}
15617
15618// Transform (add (mul x, c0), c1) ->
15619// (add (mul (add x, c1/c0), c0), c1%c0).
15620// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15621// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15622// to an infinite loop in DAGCombine if transformed.
15623// Or transform (add (mul x, c0), c1) ->
15624// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15625// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15626// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15627// lead to an infinite loop in DAGCombine if transformed.
15628// Or transform (add (mul x, c0), c1) ->
15629// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15630// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15631// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15632// lead to an infinite loop in DAGCombine if transformed.
15633// Or transform (add (mul x, c0), c1) ->
15634// (mul (add x, c1/c0), c0).
15635// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15637 const RISCVSubtarget &Subtarget) {
15638 // Skip for vector types and larger types.
15639 EVT VT = N->getValueType(0);
15640 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15641 return SDValue();
15642 // The first operand node must be a MUL and has no other use.
15643 SDValue N0 = N->getOperand(0);
15644 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15645 return SDValue();
15646 // Check if c0 and c1 match above conditions.
15647 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15648 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15649 if (!N0C || !N1C)
15650 return SDValue();
15651 // If N0C has multiple uses it's possible one of the cases in
15652 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15653 // in an infinite loop.
15654 if (!N0C->hasOneUse())
15655 return SDValue();
15656 int64_t C0 = N0C->getSExtValue();
15657 int64_t C1 = N1C->getSExtValue();
15658 int64_t CA, CB;
15659 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15660 return SDValue();
15661 // Search for proper CA (non-zero) and CB that both are simm12.
15662 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15663 !isInt<12>(C0 * (C1 / C0))) {
15664 CA = C1 / C0;
15665 CB = C1 % C0;
15666 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15667 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15668 CA = C1 / C0 + 1;
15669 CB = C1 % C0 - C0;
15670 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15671 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15672 CA = C1 / C0 - 1;
15673 CB = C1 % C0 + C0;
15674 } else
15675 return SDValue();
15676 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15677 SDLoc DL(N);
15678 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15679 DAG.getSignedConstant(CA, DL, VT));
15680 SDValue New1 =
15681 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15682 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15683}
15684
15685// add (zext, zext) -> zext (add (zext, zext))
15686// sub (zext, zext) -> sext (sub (zext, zext))
15687// mul (zext, zext) -> zext (mul (zext, zext))
15688// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15689// udiv (zext, zext) -> zext (udiv (zext, zext))
15690// srem (zext, zext) -> zext (srem (zext, zext))
15691// urem (zext, zext) -> zext (urem (zext, zext))
15692//
15693// where the sum of the extend widths match, and the the range of the bin op
15694// fits inside the width of the narrower bin op. (For profitability on rvv, we
15695// use a power of two for both inner and outer extend.)
15697
15698 EVT VT = N->getValueType(0);
15699 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15700 return SDValue();
15701
15702 SDValue N0 = N->getOperand(0);
15703 SDValue N1 = N->getOperand(1);
15705 return SDValue();
15706 if (!N0.hasOneUse() || !N1.hasOneUse())
15707 return SDValue();
15708
15709 SDValue Src0 = N0.getOperand(0);
15710 SDValue Src1 = N1.getOperand(0);
15711 EVT SrcVT = Src0.getValueType();
15712 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15713 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15714 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15715 return SDValue();
15716
15717 LLVMContext &C = *DAG.getContext();
15719 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15720
15721 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15722 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15723
15724 // Src0 and Src1 are zero extended, so they're always positive if signed.
15725 //
15726 // sub can produce a negative from two positive operands, so it needs sign
15727 // extended. Other nodes produce a positive from two positive operands, so
15728 // zero extend instead.
15729 unsigned OuterExtend =
15730 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15731
15732 return DAG.getNode(
15733 OuterExtend, SDLoc(N), VT,
15734 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15735}
15736
15737// Try to turn (add (xor bool, 1) -1) into (neg bool).
15739 SDValue N0 = N->getOperand(0);
15740 SDValue N1 = N->getOperand(1);
15741 EVT VT = N->getValueType(0);
15742 SDLoc DL(N);
15743
15744 // RHS should be -1.
15745 if (!isAllOnesConstant(N1))
15746 return SDValue();
15747
15748 // Look for (xor X, 1).
15749 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15750 return SDValue();
15751
15752 // First xor input should be 0 or 1.
15754 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15755 return SDValue();
15756
15757 // Emit a negate of the setcc.
15758 return DAG.getNegative(N0.getOperand(0), DL, VT);
15759}
15760
15763 const RISCVSubtarget &Subtarget) {
15764 SelectionDAG &DAG = DCI.DAG;
15765 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15766 return V;
15767 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15768 return V;
15769 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15770 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15771 return V;
15772 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15773 return V;
15774 }
15775 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15776 return V;
15777 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15778 return V;
15779 if (SDValue V = combineBinOpOfZExt(N, DAG))
15780 return V;
15781
15782 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15783 // (select lhs, rhs, cc, x, (add x, y))
15784 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15785}
15786
15787// Try to turn a sub boolean RHS and constant LHS into an addi.
15789 SDValue N0 = N->getOperand(0);
15790 SDValue N1 = N->getOperand(1);
15791 EVT VT = N->getValueType(0);
15792 SDLoc DL(N);
15793
15794 // Require a constant LHS.
15795 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15796 if (!N0C)
15797 return SDValue();
15798
15799 // All our optimizations involve subtracting 1 from the immediate and forming
15800 // an ADDI. Make sure the new immediate is valid for an ADDI.
15801 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15802 if (!ImmValMinus1.isSignedIntN(12))
15803 return SDValue();
15804
15805 SDValue NewLHS;
15806 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15807 // (sub constant, (setcc x, y, eq/neq)) ->
15808 // (add (setcc x, y, neq/eq), constant - 1)
15809 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15810 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15811 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15812 return SDValue();
15813 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15814 NewLHS =
15815 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15816 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15817 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15818 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15819 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15820 NewLHS = N1.getOperand(0);
15821 } else
15822 return SDValue();
15823
15824 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15825 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15826}
15827
15828// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15829// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15830// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15831// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15833 const RISCVSubtarget &Subtarget) {
15834 if (!Subtarget.hasStdExtZbb())
15835 return SDValue();
15836
15837 EVT VT = N->getValueType(0);
15838
15839 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15840 return SDValue();
15841
15842 SDValue N0 = N->getOperand(0);
15843 SDValue N1 = N->getOperand(1);
15844
15845 if (N0->getOpcode() != ISD::SHL)
15846 return SDValue();
15847
15848 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15849 if (!ShAmtCLeft)
15850 return SDValue();
15851 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15852
15853 if (ShiftedAmount >= 8)
15854 return SDValue();
15855
15856 SDValue LeftShiftOperand = N0->getOperand(0);
15857 SDValue RightShiftOperand = N1;
15858
15859 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15860 if (N1->getOpcode() != ISD::SRL)
15861 return SDValue();
15862 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15863 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15864 return SDValue();
15865 RightShiftOperand = N1.getOperand(0);
15866 }
15867
15868 // At least one shift should have a single use.
15869 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15870 return SDValue();
15871
15872 if (LeftShiftOperand != RightShiftOperand)
15873 return SDValue();
15874
15875 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15876 Mask <<= ShiftedAmount;
15877 // Check that X has indeed the right shape (only the Y-th bit can be set in
15878 // every byte).
15879 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15880 return SDValue();
15881
15882 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15883}
15884
15886 const RISCVSubtarget &Subtarget) {
15887 if (SDValue V = combineSubOfBoolean(N, DAG))
15888 return V;
15889
15890 EVT VT = N->getValueType(0);
15891 SDValue N0 = N->getOperand(0);
15892 SDValue N1 = N->getOperand(1);
15893 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15894 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15895 isNullConstant(N1.getOperand(1)) &&
15896 N1.getValueType() == N1.getOperand(0).getValueType()) {
15897 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15898 if (CCVal == ISD::SETLT) {
15899 SDLoc DL(N);
15900 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15901 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15902 DAG.getConstant(ShAmt, DL, VT));
15903 }
15904 }
15905
15906 if (SDValue V = combineBinOpOfZExt(N, DAG))
15907 return V;
15908 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15909 return V;
15910
15911 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15912 // (select lhs, rhs, cc, x, (sub x, y))
15913 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15914}
15915
15916// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15917// Legalizing setcc can introduce xors like this. Doing this transform reduces
15918// the number of xors and may allow the xor to fold into a branch condition.
15920 SDValue N0 = N->getOperand(0);
15921 SDValue N1 = N->getOperand(1);
15922 bool IsAnd = N->getOpcode() == ISD::AND;
15923
15924 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15925 return SDValue();
15926
15927 if (!N0.hasOneUse() || !N1.hasOneUse())
15928 return SDValue();
15929
15930 SDValue N01 = N0.getOperand(1);
15931 SDValue N11 = N1.getOperand(1);
15932
15933 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
15934 // (xor X, -1) based on the upper bits of the other operand being 0. If the
15935 // operation is And, allow one of the Xors to use -1.
15936 if (isOneConstant(N01)) {
15937 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
15938 return SDValue();
15939 } else if (isOneConstant(N11)) {
15940 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
15941 if (!(IsAnd && isAllOnesConstant(N01)))
15942 return SDValue();
15943 } else
15944 return SDValue();
15945
15946 EVT VT = N->getValueType(0);
15947
15948 SDValue N00 = N0.getOperand(0);
15949 SDValue N10 = N1.getOperand(0);
15950
15951 // The LHS of the xors needs to be 0/1.
15953 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
15954 return SDValue();
15955
15956 // Invert the opcode and insert a new xor.
15957 SDLoc DL(N);
15958 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15959 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
15960 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
15961}
15962
15963// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
15964// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
15965// value to an unsigned value. This will be lowered to vmax and series of
15966// vnclipu instructions later. This can be extended to other truncated types
15967// other than i8 by replacing 256 and 255 with the equivalent constants for the
15968// type.
15970 EVT VT = N->getValueType(0);
15971 SDValue N0 = N->getOperand(0);
15972 EVT SrcVT = N0.getValueType();
15973
15974 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15975 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
15976 return SDValue();
15977
15978 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
15979 return SDValue();
15980
15981 SDValue Cond = N0.getOperand(0);
15982 SDValue True = N0.getOperand(1);
15983 SDValue False = N0.getOperand(2);
15984
15985 if (Cond.getOpcode() != ISD::SETCC)
15986 return SDValue();
15987
15988 // FIXME: Support the version of this pattern with the select operands
15989 // swapped.
15990 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15991 if (CCVal != ISD::SETULT)
15992 return SDValue();
15993
15994 SDValue CondLHS = Cond.getOperand(0);
15995 SDValue CondRHS = Cond.getOperand(1);
15996
15997 if (CondLHS != True)
15998 return SDValue();
15999
16000 unsigned ScalarBits = VT.getScalarSizeInBits();
16001
16002 // FIXME: Support other constants.
16003 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
16004 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
16005 return SDValue();
16006
16007 if (False.getOpcode() != ISD::SIGN_EXTEND)
16008 return SDValue();
16009
16010 False = False.getOperand(0);
16011
16012 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
16013 return SDValue();
16014
16015 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
16016 if (!FalseRHSC || !FalseRHSC->isZero())
16017 return SDValue();
16018
16019 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
16020 if (CCVal2 != ISD::SETGT)
16021 return SDValue();
16022
16023 // Emit the signed to unsigned saturation pattern.
16024 SDLoc DL(N);
16025 SDValue Max =
16026 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
16027 SDValue Min =
16028 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
16029 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
16030 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
16031}
16032
16034 const RISCVSubtarget &Subtarget) {
16035 SDValue N0 = N->getOperand(0);
16036 EVT VT = N->getValueType(0);
16037
16038 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
16039 // extending X. This is safe since we only need the LSB after the shift and
16040 // shift amounts larger than 31 would produce poison. If we wait until
16041 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16042 // to use a BEXT instruction.
16043 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16044 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16045 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16046 SDLoc DL(N0);
16047 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16048 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16049 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16050 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16051 }
16052
16053 return combineTruncSelectToSMaxUSat(N, DAG);
16054}
16055
16056// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16057// truncation. But RVV doesn't have truncation instructions for more than twice
16058// the bitwidth.
16059//
16060// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16061//
16062// vsetvli a0, zero, e32, m2, ta, ma
16063// vnsrl.wi v12, v8, 0
16064// vsetvli zero, zero, e16, m1, ta, ma
16065// vnsrl.wi v8, v12, 0
16066// vsetvli zero, zero, e8, mf2, ta, ma
16067// vnsrl.wi v8, v8, 0
16068//
16069// So reverse the combine so we generate an vmseq/vmsne again:
16070//
16071// and (lshr (trunc X), ShAmt), 1
16072// -->
16073// zext (icmp ne (and X, (1 << ShAmt)), 0)
16074//
16075// and (lshr (not (trunc X)), ShAmt), 1
16076// -->
16077// zext (icmp eq (and X, (1 << ShAmt)), 0)
16079 const RISCVSubtarget &Subtarget) {
16080 using namespace SDPatternMatch;
16081 SDLoc DL(N);
16082
16083 if (!Subtarget.hasVInstructions())
16084 return SDValue();
16085
16086 EVT VT = N->getValueType(0);
16087 if (!VT.isVector())
16088 return SDValue();
16089
16090 APInt ShAmt;
16091 SDValue Inner;
16092 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16093 m_One())))
16094 return SDValue();
16095
16096 SDValue X;
16097 bool IsNot;
16098 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16099 IsNot = true;
16100 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16101 IsNot = false;
16102 else
16103 return SDValue();
16104
16105 EVT WideVT = X.getValueType();
16106 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16107 return SDValue();
16108
16109 SDValue Res =
16110 DAG.getNode(ISD::AND, DL, WideVT, X,
16111 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16112 Res = DAG.getSetCC(DL,
16113 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16114 WideVT.getVectorElementCount()),
16115 Res, DAG.getConstant(0, DL, WideVT),
16116 IsNot ? ISD::SETEQ : ISD::SETNE);
16117 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16118}
16119
16120// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
16121// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
16122// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
16123// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
16125 const RISCVSubtarget &Subtarget) {
16126 if (!Subtarget.hasCZEROLike())
16127 return SDValue();
16128
16129 SDValue N0 = N->getOperand(0);
16130 SDValue N1 = N->getOperand(1);
16131
16132 auto IsEqualCompZero = [](SDValue &V) -> bool {
16133 if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
16134 ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
16136 return true;
16137 }
16138 return false;
16139 };
16140
16141 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16142 std::swap(N0, N1);
16143 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16144 return SDValue();
16145
16146 KnownBits Known = DAG.computeKnownBits(N1);
16147 if (Known.getMaxValue().ugt(1))
16148 return SDValue();
16149
16150 unsigned CzeroOpcode =
16151 (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
16152 ? RISCVISD::CZERO_EQZ
16153 : RISCVISD::CZERO_NEZ;
16154
16155 EVT VT = N->getValueType(0);
16156 SDLoc DL(N);
16157 return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
16158}
16159
16162 SelectionDAG &DAG = DCI.DAG;
16163 if (N->getOpcode() != ISD::AND)
16164 return SDValue();
16165
16166 SDValue N0 = N->getOperand(0);
16167 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16168 return SDValue();
16169 if (!N0.hasOneUse())
16170 return SDValue();
16171
16174 return SDValue();
16175
16176 EVT LoadedVT = ALoad->getMemoryVT();
16177 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16178 if (!MaskConst)
16179 return SDValue();
16180 uint64_t Mask = MaskConst->getZExtValue();
16181 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16182 if (Mask != ExpectedMask)
16183 return SDValue();
16184
16185 SDValue ZextLoad = DAG.getAtomicLoad(
16186 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16187 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16188 DCI.CombineTo(N, ZextLoad);
16189 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16191 return SDValue(N, 0);
16192}
16193
16194// Combines two comparison operation and logic operation to one selection
16195// operation(min, max) and logic operation. Returns new constructed Node if
16196// conditions for optimization are satisfied.
16199 const RISCVSubtarget &Subtarget) {
16200 SelectionDAG &DAG = DCI.DAG;
16201
16202 SDValue N0 = N->getOperand(0);
16203 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16204 // extending X. This is safe since we only need the LSB after the shift and
16205 // shift amounts larger than 31 would produce poison. If we wait until
16206 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16207 // to use a BEXT instruction.
16208 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16209 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16210 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16211 N0.hasOneUse()) {
16212 SDLoc DL(N);
16213 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16214 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16215 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16216 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16217 DAG.getConstant(1, DL, MVT::i64));
16218 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16219 }
16220
16221 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16222 return V;
16223 if (DCI.isAfterLegalizeDAG())
16224 if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
16225 return V;
16226 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16227 return V;
16228 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16229 return V;
16230 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16231 return V;
16232
16233 if (DCI.isAfterLegalizeDAG())
16234 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16235 return V;
16236
16237 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16238 // (select lhs, rhs, cc, x, (and x, y))
16239 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16240}
16241
16242// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16243// FIXME: Generalize to other binary operators with same operand.
16245 SelectionDAG &DAG) {
16246 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16247
16248 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16249 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16250 !N0.hasOneUse() || !N1.hasOneUse())
16251 return SDValue();
16252
16253 // Should have the same condition.
16254 SDValue Cond = N0.getOperand(1);
16255 if (Cond != N1.getOperand(1))
16256 return SDValue();
16257
16258 SDValue TrueV = N0.getOperand(0);
16259 SDValue FalseV = N1.getOperand(0);
16260
16261 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16262 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16263 !isOneConstant(TrueV.getOperand(1)) ||
16264 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16265 return SDValue();
16266
16267 EVT VT = N->getValueType(0);
16268 SDLoc DL(N);
16269
16270 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16271 Cond);
16272 SDValue NewN1 =
16273 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16274 SDValue NewOr =
16275 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16276 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16277}
16278
16279// (xor X, (xor (and X, C2), Y))
16280// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16281// where C2 is a shifted mask with width = Width and shift = ShAmt
16282// qc_insb might become qc.insb or qc.insbi depending on the operands.
16284 const RISCVSubtarget &Subtarget) {
16285 if (!Subtarget.hasVendorXqcibm())
16286 return SDValue();
16287
16288 using namespace SDPatternMatch;
16289 SDValue Base, Inserted;
16290 APInt CMask;
16291 if (!sd_match(N, m_Xor(m_Value(Base),
16293 m_ConstInt(CMask))),
16294 m_Value(Inserted))))))
16295 return SDValue();
16296
16297 if (N->getValueType(0) != MVT::i32)
16298 return SDValue();
16299 unsigned Width, ShAmt;
16300 if (!CMask.isShiftedMask(ShAmt, Width))
16301 return SDValue();
16302
16303 // Check if all zero bits in CMask are also zero in Inserted
16304 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16305 return SDValue();
16306
16307 SDLoc DL(N);
16308
16309 // `Inserted` needs to be right shifted before it is put into the
16310 // instruction.
16311 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16312 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16313
16314 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16315 DAG.getConstant(ShAmt, DL, MVT::i32)};
16316 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16317}
16318
16320 const RISCVSubtarget &Subtarget) {
16321 if (!Subtarget.hasVendorXqcibm())
16322 return SDValue();
16323
16324 using namespace SDPatternMatch;
16325
16326 SDValue X;
16327 APInt MaskImm;
16328 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
16329 return SDValue();
16330
16331 unsigned ShAmt, Width;
16332 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
16333 return SDValue();
16334
16335 if (N->getValueType(0) != MVT::i32)
16336 return SDValue();
16337
16338 // If Zbs is enabled and it is a single bit set we can use BSETI which
16339 // can be compressed to C_BSETI when Xqcibm in enabled.
16340 if (Width == 1 && Subtarget.hasStdExtZbs())
16341 return SDValue();
16342
16343 // If C1 is a shifted mask (but can't be formed as an ORI),
16344 // use a bitfield insert of -1.
16345 // Transform (or x, C1)
16346 // -> (qc.insbi x, -1, width, shift)
16347 SDLoc DL(N);
16348
16349 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
16350 DAG.getConstant(Width, DL, MVT::i32),
16351 DAG.getConstant(ShAmt, DL, MVT::i32)};
16352 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16353}
16354
16355// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
16356// being inserted only sets known zero bits.
16358 const RISCVSubtarget &Subtarget) {
16359 // Supported only in Xqcibm for now.
16360 if (!Subtarget.hasVendorXqcibm())
16361 return SDValue();
16362
16363 using namespace SDPatternMatch;
16364
16365 SDValue Inserted;
16366 APInt MaskImm, OrImm;
16367 if (!sd_match(
16368 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
16369 m_ConstInt(MaskImm))),
16370 m_ConstInt(OrImm)))))
16371 return SDValue();
16372
16373 // Compute the Known Zero for the AND as this allows us to catch more general
16374 // cases than just looking for AND with imm.
16375 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
16376
16377 // The bits being inserted must only set those bits that are known to be
16378 // zero.
16379 if (!OrImm.isSubsetOf(Known.Zero)) {
16380 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
16381 // currently handle this case.
16382 return SDValue();
16383 }
16384
16385 unsigned ShAmt, Width;
16386 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
16387 if (!Known.Zero.isShiftedMask(ShAmt, Width))
16388 return SDValue();
16389
16390 // QC_INSB(I) dst, src, #width, #shamt.
16391 SDLoc DL(N);
16392
16393 SDValue ImmNode =
16394 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
16395
16396 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
16397 DAG.getConstant(ShAmt, DL, MVT::i32)};
16398 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16399}
16400
16402 const RISCVSubtarget &Subtarget) {
16403 SelectionDAG &DAG = DCI.DAG;
16404
16405 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
16406 return V;
16407 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
16408 return V;
16409 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16410 return V;
16411 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16412 return V;
16413
16414 if (DCI.isAfterLegalizeDAG())
16415 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16416 return V;
16417
16418 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16419 // We may be able to pull a common operation out of the true and false value.
16420 SDValue N0 = N->getOperand(0);
16421 SDValue N1 = N->getOperand(1);
16422 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16423 return V;
16424 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16425 return V;
16426
16427 // fold (or (select cond, 0, y), x) ->
16428 // (select cond, x, (or x, y))
16429 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16430}
16431
16433 const RISCVSubtarget &Subtarget) {
16434 SDValue N0 = N->getOperand(0);
16435 SDValue N1 = N->getOperand(1);
16436
16437 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16438 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
16439 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16440 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16441 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16442 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16443 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16444 SDLoc DL(N);
16445 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16446 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16447 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16448 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16449 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16450 }
16451
16452 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16453 // NOTE: Assumes ROL being legal means ROLW is legal.
16454 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16455 if (N0.getOpcode() == RISCVISD::SLLW &&
16457 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16458 SDLoc DL(N);
16459 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16460 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16461 }
16462
16463 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16464 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16465 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16467 if (ConstN00 && CC == ISD::SETLT) {
16468 EVT VT = N0.getValueType();
16469 SDLoc DL(N0);
16470 const APInt &Imm = ConstN00->getAPIntValue();
16471 if ((Imm + 1).isSignedIntN(12))
16472 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16473 DAG.getConstant(Imm + 1, DL, VT), CC);
16474 }
16475 }
16476
16477 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16478 return V;
16479
16480 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16481 return V;
16482 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16483 return V;
16484
16485 // fold (xor (select cond, 0, y), x) ->
16486 // (select cond, x, (xor x, y))
16487 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16488}
16489
16490// Try to expand a multiply to a sequence of shifts and add/subs,
16491// for a machine without native mul instruction.
16493 uint64_t MulAmt) {
16494 SDLoc DL(N);
16495 EVT VT = N->getValueType(0);
16497
16498 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16499 SDValue N0 = N->getOperand(0);
16500
16501 // Find the Non-adjacent form of the multiplier.
16502 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16503 if (E & 1) {
16504 bool IsAdd = (E & 3) == 1;
16505 E -= IsAdd ? 1 : -1;
16506 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16507 DAG.getShiftAmountConstant(I, VT, DL));
16508 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16509 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16510 }
16511 }
16512
16513 return Result;
16514}
16515
16516// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16518 uint64_t MulAmt) {
16519 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16521 uint64_t ShiftAmt1;
16522 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16523 Op = ISD::SUB;
16524 ShiftAmt1 = MulAmt + MulAmtLowBit;
16525 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16526 Op = ISD::ADD;
16527 ShiftAmt1 = MulAmt - MulAmtLowBit;
16528 } else {
16529 return SDValue();
16530 }
16531 EVT VT = N->getValueType(0);
16532 SDLoc DL(N);
16533 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16534 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16535 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16536 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16537 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16538}
16539
16540static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
16541 unsigned ShY, bool AddX, unsigned Shift) {
16542 SDLoc DL(N);
16543 EVT VT = N->getValueType(0);
16544 SDValue X = N->getOperand(0);
16545 // Put the shift first if we can fold a zext into the shift forming a slli.uw.
16546 using namespace SDPatternMatch;
16547 if (Shift != 0 &&
16548 sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
16549 X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
16550 Shift = 0;
16551 }
16552 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16553 DAG.getTargetConstant(ShY, DL, VT), X);
16554 if (ShX != 0)
16555 ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
16556 DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
16557 if (Shift == 0)
16558 return ShlAdd;
16559 // Otherwise, put the shl last so that it can fold with following instructions
16560 // (e.g. sext or add).
16561 return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
16562}
16563
16565 uint64_t MulAmt, unsigned Shift) {
16566 switch (MulAmt) {
16567 // 3/5/9 -> (shYadd X, X)
16568 case 3:
16569 return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
16570 case 5:
16571 return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
16572 case 9:
16573 return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
16574 // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
16575 case 5 * 3:
16576 return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
16577 case 9 * 3:
16578 return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
16579 case 5 * 5:
16580 return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
16581 case 9 * 5:
16582 return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
16583 case 9 * 9:
16584 return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
16585 default:
16586 break;
16587 }
16588
16589 // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
16590 int ShX;
16591 if (int ShY = isShifted359(MulAmt - 1, ShX)) {
16592 assert(ShX != 0 && "MulAmt=4,6,10 handled before");
16593 if (ShX <= 3)
16594 return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
16595 }
16596 return SDValue();
16597}
16598
16599// Try to expand a scalar multiply to a faster sequence.
16602 const RISCVSubtarget &Subtarget) {
16603
16604 EVT VT = N->getValueType(0);
16605
16606 // LI + MUL is usually smaller than the alternative sequence.
16608 return SDValue();
16609
16610 if (VT != Subtarget.getXLenVT())
16611 return SDValue();
16612
16613 bool ShouldExpandMul =
16614 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16615 !Subtarget.hasStdExtZmmul();
16616 if (!ShouldExpandMul)
16617 return SDValue();
16618
16619 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16620 if (!CNode)
16621 return SDValue();
16622 uint64_t MulAmt = CNode->getZExtValue();
16623
16624 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16625 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16626 return SDValue();
16627
16628 // WARNING: The code below is knowingly incorrect with regards to undef
16629 // semantics. We're adding additional uses of X here, and in principle, we
16630 // should be freezing X before doing so. However, adding freeze here causes
16631 // real regressions, and no other target properly freezes X in these cases
16632 // either.
16633 if (Subtarget.hasShlAdd(3)) {
16634 // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
16635 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16636 // of 25 which happen to be quite common.
16637 // (2/4/8 * 3/5/9 + 1) * 2^N
16638 unsigned Shift = llvm::countr_zero(MulAmt);
16639 if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
16640 return V;
16641
16642 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16643 // shXadd. First check if this a sum of two power of 2s because that's
16644 // easy. Then count how many zeros are up to the first bit.
16645 SDValue X = N->getOperand(0);
16646 if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16647 unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
16648 SDLoc DL(N);
16649 SDValue Shift1 =
16650 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16651 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16652 DAG.getTargetConstant(Shift, DL, VT), Shift1);
16653 }
16654
16655 // TODO: 2^(C1>3) * 3,5,9 +/- 1
16656
16657 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16658 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16659 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16660 if (ScaleShift >= 1 && ScaleShift < 4) {
16661 unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
16662 SDLoc DL(N);
16663 SDValue Shift1 =
16664 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16665 return DAG.getNode(
16666 ISD::ADD, DL, VT, Shift1,
16667 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16668 DAG.getTargetConstant(ScaleShift, DL, VT), X));
16669 }
16670 }
16671
16672 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16673 for (uint64_t Offset : {3, 5, 9}) {
16674 if (isPowerOf2_64(MulAmt + Offset)) {
16675 unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
16676 if (ShAmt >= VT.getSizeInBits())
16677 continue;
16678 SDLoc DL(N);
16679 SDValue Shift1 =
16680 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16681 SDValue Mul359 =
16682 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16683 DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);
16684 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16685 }
16686 }
16687 }
16688
16689 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16690 return V;
16691
16692 if (!Subtarget.hasStdExtZmmul())
16693 return expandMulToNAFSequence(N, DAG, MulAmt);
16694
16695 return SDValue();
16696}
16697
16698// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16699// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16700// Same for other equivalent types with other equivalent constants.
16702 EVT VT = N->getValueType(0);
16703 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16704
16705 // Do this for legal vectors unless they are i1 or i8 vectors.
16706 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16707 return SDValue();
16708
16709 if (N->getOperand(0).getOpcode() != ISD::AND ||
16710 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16711 return SDValue();
16712
16713 SDValue And = N->getOperand(0);
16714 SDValue Srl = And.getOperand(0);
16715
16716 APInt V1, V2, V3;
16717 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16718 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16720 return SDValue();
16721
16722 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16723 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16724 V3 != (HalfSize - 1))
16725 return SDValue();
16726
16727 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16728 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16729 VT.getVectorElementCount() * 2);
16730 SDLoc DL(N);
16731 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16732 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16733 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16734 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16735}
16736
16739 const RISCVSubtarget &Subtarget) {
16740 EVT VT = N->getValueType(0);
16741 if (!VT.isVector())
16742 return expandMul(N, DAG, DCI, Subtarget);
16743
16744 SDLoc DL(N);
16745 SDValue N0 = N->getOperand(0);
16746 SDValue N1 = N->getOperand(1);
16747 SDValue MulOper;
16748 unsigned AddSubOpc;
16749
16750 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16751 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16752 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16753 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16754 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16755 AddSubOpc = V->getOpcode();
16756 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16757 SDValue Opnd = V->getOperand(1);
16758 MulOper = V->getOperand(0);
16759 if (AddSubOpc == ISD::SUB)
16760 std::swap(Opnd, MulOper);
16761 if (isOneOrOneSplat(Opnd))
16762 return true;
16763 }
16764 return false;
16765 };
16766
16767 if (IsAddSubWith1(N0)) {
16768 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16769 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16770 }
16771
16772 if (IsAddSubWith1(N1)) {
16773 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16774 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16775 }
16776
16777 if (SDValue V = combineBinOpOfZExt(N, DAG))
16778 return V;
16779
16781 return V;
16782
16783 return SDValue();
16784}
16785
16786/// According to the property that indexed load/store instructions zero-extend
16787/// their indices, try to narrow the type of index operand.
16788static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16789 if (isIndexTypeSigned(IndexType))
16790 return false;
16791
16792 if (!N->hasOneUse())
16793 return false;
16794
16795 EVT VT = N.getValueType();
16796 SDLoc DL(N);
16797
16798 // In general, what we're doing here is seeing if we can sink a truncate to
16799 // a smaller element type into the expression tree building our index.
16800 // TODO: We can generalize this and handle a bunch more cases if useful.
16801
16802 // Narrow a buildvector to the narrowest element type. This requires less
16803 // work and less register pressure at high LMUL, and creates smaller constants
16804 // which may be cheaper to materialize.
16805 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16806 KnownBits Known = DAG.computeKnownBits(N);
16807 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16808 LLVMContext &C = *DAG.getContext();
16809 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16810 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16811 N = DAG.getNode(ISD::TRUNCATE, DL,
16812 VT.changeVectorElementType(ResultVT), N);
16813 return true;
16814 }
16815 }
16816
16817 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16818 if (N.getOpcode() != ISD::SHL)
16819 return false;
16820
16821 SDValue N0 = N.getOperand(0);
16822 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16823 N0.getOpcode() != RISCVISD::VZEXT_VL)
16824 return false;
16825 if (!N0->hasOneUse())
16826 return false;
16827
16828 APInt ShAmt;
16829 SDValue N1 = N.getOperand(1);
16830 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16831 return false;
16832
16833 SDValue Src = N0.getOperand(0);
16834 EVT SrcVT = Src.getValueType();
16835 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16836 unsigned ShAmtV = ShAmt.getZExtValue();
16837 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16838 NewElen = std::max(NewElen, 8U);
16839
16840 // Skip if NewElen is not narrower than the original extended type.
16841 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16842 return false;
16843
16844 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16845 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16846
16847 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16848 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16849 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16850 return true;
16851}
16852
16853/// Try to map an integer comparison with size > XLEN to vector instructions
16854/// before type legalization splits it up into chunks.
16855static SDValue
16857 const SDLoc &DL, SelectionDAG &DAG,
16858 const RISCVSubtarget &Subtarget) {
16859 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16860
16861 if (!Subtarget.hasVInstructions())
16862 return SDValue();
16863
16864 MVT XLenVT = Subtarget.getXLenVT();
16865 EVT OpVT = X.getValueType();
16866 // We're looking for an oversized integer equality comparison.
16867 if (!OpVT.isScalarInteger())
16868 return SDValue();
16869
16870 unsigned OpSize = OpVT.getSizeInBits();
16871 // The size should be larger than XLen and smaller than the maximum vector
16872 // size.
16873 if (OpSize <= Subtarget.getXLen() ||
16874 OpSize > Subtarget.getRealMinVLen() *
16876 return SDValue();
16877
16878 // Don't perform this combine if constructing the vector will be expensive.
16879 auto IsVectorBitCastCheap = [](SDValue X) {
16881 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16882 X.getOpcode() == ISD::LOAD;
16883 };
16884 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16885 return SDValue();
16886
16888 Attribute::NoImplicitFloat))
16889 return SDValue();
16890
16891 // Bail out for non-byte-sized types.
16892 if (!OpVT.isByteSized())
16893 return SDValue();
16894
16895 unsigned VecSize = OpSize / 8;
16896 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16897 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16898
16899 SDValue VecX = DAG.getBitcast(VecVT, X);
16900 SDValue VecY = DAG.getBitcast(VecVT, Y);
16901 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16902 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16903
16904 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16905 DAG.getCondCode(ISD::SETNE), Mask, VL);
16906 return DAG.getSetCC(DL, VT,
16907 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16908 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16909 VL),
16910 DAG.getConstant(0, DL, XLenVT), CC);
16911}
16912
16915 const RISCVSubtarget &Subtarget) {
16916 SelectionDAG &DAG = DCI.DAG;
16917 SDLoc dl(N);
16918 SDValue N0 = N->getOperand(0);
16919 SDValue N1 = N->getOperand(1);
16920 EVT VT = N->getValueType(0);
16921 EVT OpVT = N0.getValueType();
16922
16923 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16924 // Looking for an equality compare.
16925 if (!isIntEqualitySetCC(Cond))
16926 return SDValue();
16927
16928 if (SDValue V =
16929 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
16930 return V;
16931
16932 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
16933 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
16935 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
16936 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
16937 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
16938 AndRHSC.isNegatedPowerOf2()) {
16939 unsigned ShiftBits = AndRHSC.countr_zero();
16940 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
16941 DAG.getConstant(ShiftBits, dl, OpVT));
16942 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
16943 }
16944
16945 // Similar to above but handling the lower 32 bits by using sraiw. Allow
16946 // comparing with constants other than 0 if the constant can be folded into
16947 // addi or xori after shifting.
16948 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
16949 uint64_t AndRHSInt = AndRHSC.getZExtValue();
16950 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
16951 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
16952 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
16953 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
16954 if (NewC >= -2048 && NewC <= 2048) {
16955 SDValue SExt =
16956 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
16957 DAG.getValueType(MVT::i32));
16958 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
16959 DAG.getConstant(ShiftBits, dl, OpVT));
16960 return DAG.getSetCC(dl, VT, Shift,
16961 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
16962 }
16963 }
16964 }
16965
16966 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
16967 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
16968 // bit 31. Same for setne. C1' may be cheaper to materialize and the
16969 // sext_inreg can become a sext.w instead of a shift pair.
16970 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
16971 return SDValue();
16972
16973 // RHS needs to be a constant.
16974 auto *N1C = dyn_cast<ConstantSDNode>(N1);
16975 if (!N1C)
16976 return SDValue();
16977
16978 // LHS needs to be (and X, 0xffffffff).
16979 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
16981 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
16982 return SDValue();
16983
16984 // Don't do this if the sign bit is provably zero, it will be turned back into
16985 // an AND.
16986 APInt SignMask = APInt::getOneBitSet(64, 31);
16987 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
16988 return SDValue();
16989
16990 const APInt &C1 = N1C->getAPIntValue();
16991
16992 // If the constant is larger than 2^32 - 1 it is impossible for both sides
16993 // to be equal.
16994 if (C1.getActiveBits() > 32)
16995 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
16996
16997 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
16998 N0.getOperand(0), DAG.getValueType(MVT::i32));
16999 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
17000 dl, OpVT), Cond);
17001}
17002
17003static SDValue
17005 const RISCVSubtarget &Subtarget) {
17006 SelectionDAG &DAG = DCI.DAG;
17007 SDValue Src = N->getOperand(0);
17008 EVT VT = N->getValueType(0);
17009 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
17010 unsigned Opc = Src.getOpcode();
17011 SDLoc DL(N);
17012
17013 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
17014 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
17015 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
17016 Subtarget.hasStdExtZfhmin())
17017 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
17018
17019 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
17020 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
17021 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
17022 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
17023 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
17024 Src.getOperand(1));
17025
17026 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
17027 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
17028 return DAG.getNegative(Src, DL, VT);
17029
17030 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
17031 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
17032 isAllOnesConstant(Src.getOperand(1)) &&
17033 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
17034 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
17035 DAG.getAllOnesConstant(DL, VT));
17036
17037 return SDValue();
17038}
17039
17040namespace {
17041// Forward declaration of the structure holding the necessary information to
17042// apply a combine.
17043struct CombineResult;
17044
17045enum ExtKind : uint8_t {
17046 ZExt = 1 << 0,
17047 SExt = 1 << 1,
17048 FPExt = 1 << 2,
17049 BF16Ext = 1 << 3
17050};
17051/// Helper class for folding sign/zero extensions.
17052/// In particular, this class is used for the following combines:
17053/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
17054/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17055/// mul | mul_vl -> vwmul(u) | vwmul_su
17056/// shl | shl_vl -> vwsll
17057/// fadd -> vfwadd | vfwadd_w
17058/// fsub -> vfwsub | vfwsub_w
17059/// fmul -> vfwmul
17060/// An object of this class represents an operand of the operation we want to
17061/// combine.
17062/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
17063/// NodeExtensionHelper for `a` and one for `b`.
17064///
17065/// This class abstracts away how the extension is materialized and
17066/// how its number of users affect the combines.
17067///
17068/// In particular:
17069/// - VWADD_W is conceptually == add(op0, sext(op1))
17070/// - VWADDU_W == add(op0, zext(op1))
17071/// - VWSUB_W == sub(op0, sext(op1))
17072/// - VWSUBU_W == sub(op0, zext(op1))
17073/// - VFWADD_W == fadd(op0, fpext(op1))
17074/// - VFWSUB_W == fsub(op0, fpext(op1))
17075/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
17076/// zext|sext(smaller_value).
17077struct NodeExtensionHelper {
17078 /// Records if this operand is like being zero extended.
17079 bool SupportsZExt;
17080 /// Records if this operand is like being sign extended.
17081 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
17082 /// instance, a splat constant (e.g., 3), would support being both sign and
17083 /// zero extended.
17084 bool SupportsSExt;
17085 /// Records if this operand is like being floating point extended.
17086 bool SupportsFPExt;
17087 /// Records if this operand is extended from bf16.
17088 bool SupportsBF16Ext;
17089 /// This boolean captures whether we care if this operand would still be
17090 /// around after the folding happens.
17091 bool EnforceOneUse;
17092 /// Original value that this NodeExtensionHelper represents.
17093 SDValue OrigOperand;
17094
17095 /// Get the value feeding the extension or the value itself.
17096 /// E.g., for zext(a), this would return a.
17097 SDValue getSource() const {
17098 switch (OrigOperand.getOpcode()) {
17099 case ISD::ZERO_EXTEND:
17100 case ISD::SIGN_EXTEND:
17101 case RISCVISD::VSEXT_VL:
17102 case RISCVISD::VZEXT_VL:
17103 case RISCVISD::FP_EXTEND_VL:
17104 return OrigOperand.getOperand(0);
17105 default:
17106 return OrigOperand;
17107 }
17108 }
17109
17110 /// Check if this instance represents a splat.
17111 bool isSplat() const {
17112 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
17113 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
17114 }
17115
17116 /// Get the extended opcode.
17117 unsigned getExtOpc(ExtKind SupportsExt) const {
17118 switch (SupportsExt) {
17119 case ExtKind::SExt:
17120 return RISCVISD::VSEXT_VL;
17121 case ExtKind::ZExt:
17122 return RISCVISD::VZEXT_VL;
17123 case ExtKind::FPExt:
17124 case ExtKind::BF16Ext:
17125 return RISCVISD::FP_EXTEND_VL;
17126 }
17127 llvm_unreachable("Unknown ExtKind enum");
17128 }
17129
17130 /// Get or create a value that can feed \p Root with the given extension \p
17131 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
17132 /// operand. \see ::getSource().
17133 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
17134 const RISCVSubtarget &Subtarget,
17135 std::optional<ExtKind> SupportsExt) const {
17136 if (!SupportsExt.has_value())
17137 return OrigOperand;
17138
17139 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
17140
17141 SDValue Source = getSource();
17142 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
17143 if (Source.getValueType() == NarrowVT)
17144 return Source;
17145
17146 unsigned ExtOpc = getExtOpc(*SupportsExt);
17147
17148 // If we need an extension, we should be changing the type.
17149 SDLoc DL(OrigOperand);
17150 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17151 switch (OrigOperand.getOpcode()) {
17152 case ISD::ZERO_EXTEND:
17153 case ISD::SIGN_EXTEND:
17154 case RISCVISD::VSEXT_VL:
17155 case RISCVISD::VZEXT_VL:
17156 case RISCVISD::FP_EXTEND_VL:
17157 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17158 case ISD::SPLAT_VECTOR:
17159 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17160 case RISCVISD::VMV_V_X_VL:
17161 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17162 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17163 case RISCVISD::VFMV_V_F_VL:
17164 Source = Source.getOperand(1);
17165 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17166 Source = Source.getOperand(0);
17167 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17168 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17169 DAG.getUNDEF(NarrowVT), Source, VL);
17170 default:
17171 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17172 // and that operand should already have the right NarrowVT so no
17173 // extension should be required at this point.
17174 llvm_unreachable("Unsupported opcode");
17175 }
17176 }
17177
17178 /// Helper function to get the narrow type for \p Root.
17179 /// The narrow type is the type of \p Root where we divided the size of each
17180 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17181 /// \pre Both the narrow type and the original type should be legal.
17182 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17183 MVT VT = Root->getSimpleValueType(0);
17184
17185 // Determine the narrow size.
17186 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17187
17188 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17189 : SupportsExt == ExtKind::FPExt
17190 ? MVT::getFloatingPointVT(NarrowSize)
17191 : MVT::getIntegerVT(NarrowSize);
17192
17193 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17194 "Trying to extend something we can't represent");
17195 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17196 return NarrowVT;
17197 }
17198
17199 /// Get the opcode to materialize:
17200 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17201 static unsigned getSExtOpcode(unsigned Opcode) {
17202 switch (Opcode) {
17203 case ISD::ADD:
17204 case RISCVISD::ADD_VL:
17205 case RISCVISD::VWADD_W_VL:
17206 case RISCVISD::VWADDU_W_VL:
17207 case ISD::OR:
17208 case RISCVISD::OR_VL:
17209 return RISCVISD::VWADD_VL;
17210 case ISD::SUB:
17211 case RISCVISD::SUB_VL:
17212 case RISCVISD::VWSUB_W_VL:
17213 case RISCVISD::VWSUBU_W_VL:
17214 return RISCVISD::VWSUB_VL;
17215 case ISD::MUL:
17216 case RISCVISD::MUL_VL:
17217 return RISCVISD::VWMUL_VL;
17218 default:
17219 llvm_unreachable("Unexpected opcode");
17220 }
17221 }
17222
17223 /// Get the opcode to materialize:
17224 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17225 static unsigned getZExtOpcode(unsigned Opcode) {
17226 switch (Opcode) {
17227 case ISD::ADD:
17228 case RISCVISD::ADD_VL:
17229 case RISCVISD::VWADD_W_VL:
17230 case RISCVISD::VWADDU_W_VL:
17231 case ISD::OR:
17232 case RISCVISD::OR_VL:
17233 return RISCVISD::VWADDU_VL;
17234 case ISD::SUB:
17235 case RISCVISD::SUB_VL:
17236 case RISCVISD::VWSUB_W_VL:
17237 case RISCVISD::VWSUBU_W_VL:
17238 return RISCVISD::VWSUBU_VL;
17239 case ISD::MUL:
17240 case RISCVISD::MUL_VL:
17241 return RISCVISD::VWMULU_VL;
17242 case ISD::SHL:
17243 case RISCVISD::SHL_VL:
17244 return RISCVISD::VWSLL_VL;
17245 default:
17246 llvm_unreachable("Unexpected opcode");
17247 }
17248 }
17249
17250 /// Get the opcode to materialize:
17251 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17252 static unsigned getFPExtOpcode(unsigned Opcode) {
17253 switch (Opcode) {
17254 case RISCVISD::FADD_VL:
17255 case RISCVISD::VFWADD_W_VL:
17256 return RISCVISD::VFWADD_VL;
17257 case RISCVISD::FSUB_VL:
17258 case RISCVISD::VFWSUB_W_VL:
17259 return RISCVISD::VFWSUB_VL;
17260 case RISCVISD::FMUL_VL:
17261 return RISCVISD::VFWMUL_VL;
17262 case RISCVISD::VFMADD_VL:
17263 return RISCVISD::VFWMADD_VL;
17264 case RISCVISD::VFMSUB_VL:
17265 return RISCVISD::VFWMSUB_VL;
17266 case RISCVISD::VFNMADD_VL:
17267 return RISCVISD::VFWNMADD_VL;
17268 case RISCVISD::VFNMSUB_VL:
17269 return RISCVISD::VFWNMSUB_VL;
17270 default:
17271 llvm_unreachable("Unexpected opcode");
17272 }
17273 }
17274
17275 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17276 /// newOpcode(a, b).
17277 static unsigned getSUOpcode(unsigned Opcode) {
17278 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17279 "SU is only supported for MUL");
17280 return RISCVISD::VWMULSU_VL;
17281 }
17282
17283 /// Get the opcode to materialize
17284 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17285 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17286 switch (Opcode) {
17287 case ISD::ADD:
17288 case RISCVISD::ADD_VL:
17289 case ISD::OR:
17290 case RISCVISD::OR_VL:
17291 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17292 : RISCVISD::VWADDU_W_VL;
17293 case ISD::SUB:
17294 case RISCVISD::SUB_VL:
17295 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17296 : RISCVISD::VWSUBU_W_VL;
17297 case RISCVISD::FADD_VL:
17298 return RISCVISD::VFWADD_W_VL;
17299 case RISCVISD::FSUB_VL:
17300 return RISCVISD::VFWSUB_W_VL;
17301 default:
17302 llvm_unreachable("Unexpected opcode");
17303 }
17304 }
17305
17306 using CombineToTry = std::function<std::optional<CombineResult>(
17307 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17308 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17309 const RISCVSubtarget &)>;
17310
17311 /// Check if this node needs to be fully folded or extended for all users.
17312 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17313
17314 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17315 const RISCVSubtarget &Subtarget) {
17316 unsigned Opc = OrigOperand.getOpcode();
17317 MVT VT = OrigOperand.getSimpleValueType();
17318
17319 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17320 "Unexpected Opcode");
17321
17322 // The pasthru must be undef for tail agnostic.
17323 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17324 return;
17325
17326 // Get the scalar value.
17327 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17328 : OrigOperand.getOperand(1);
17329
17330 // See if we have enough sign bits or zero bits in the scalar to use a
17331 // widening opcode by splatting to smaller element size.
17332 unsigned EltBits = VT.getScalarSizeInBits();
17333 unsigned ScalarBits = Op.getValueSizeInBits();
17334 // If we're not getting all bits from the element, we need special handling.
17335 if (ScalarBits < EltBits) {
17336 // This should only occur on RV32.
17337 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17338 !Subtarget.is64Bit() && "Unexpected splat");
17339 // vmv.v.x sign extends narrow inputs.
17340 SupportsSExt = true;
17341
17342 // If the input is positive, then sign extend is also zero extend.
17343 if (DAG.SignBitIsZero(Op))
17344 SupportsZExt = true;
17345
17346 EnforceOneUse = false;
17347 return;
17348 }
17349
17350 unsigned NarrowSize = EltBits / 2;
17351 // If the narrow type cannot be expressed with a legal VMV,
17352 // this is not a valid candidate.
17353 if (NarrowSize < 8)
17354 return;
17355
17356 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17357 SupportsSExt = true;
17358
17359 if (DAG.MaskedValueIsZero(Op,
17360 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17361 SupportsZExt = true;
17362
17363 EnforceOneUse = false;
17364 }
17365
17366 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17367 return (NarrowEltVT == MVT::f32 ||
17368 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17369 }
17370
17371 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17372 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17373 }
17374
17375 /// Helper method to set the various fields of this struct based on the
17376 /// type of \p Root.
17377 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17378 const RISCVSubtarget &Subtarget) {
17379 SupportsZExt = false;
17380 SupportsSExt = false;
17381 SupportsFPExt = false;
17382 SupportsBF16Ext = false;
17383 EnforceOneUse = true;
17384 unsigned Opc = OrigOperand.getOpcode();
17385 // For the nodes we handle below, we end up using their inputs directly: see
17386 // getSource(). However since they either don't have a passthru or we check
17387 // that their passthru is undef, we can safely ignore their mask and VL.
17388 switch (Opc) {
17389 case ISD::ZERO_EXTEND:
17390 case ISD::SIGN_EXTEND: {
17391 MVT VT = OrigOperand.getSimpleValueType();
17392 if (!VT.isVector())
17393 break;
17394
17395 SDValue NarrowElt = OrigOperand.getOperand(0);
17396 MVT NarrowVT = NarrowElt.getSimpleValueType();
17397 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17398 if (NarrowVT.getVectorElementType() == MVT::i1)
17399 break;
17400
17401 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17402 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17403 break;
17404 }
17405 case RISCVISD::VZEXT_VL:
17406 SupportsZExt = true;
17407 break;
17408 case RISCVISD::VSEXT_VL:
17409 SupportsSExt = true;
17410 break;
17411 case RISCVISD::FP_EXTEND_VL: {
17412 MVT NarrowEltVT =
17414 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17415 SupportsFPExt = true;
17416 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17417 SupportsBF16Ext = true;
17418
17419 break;
17420 }
17421 case ISD::SPLAT_VECTOR:
17422 case RISCVISD::VMV_V_X_VL:
17423 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17424 break;
17425 case RISCVISD::VFMV_V_F_VL: {
17426 MVT VT = OrigOperand.getSimpleValueType();
17427
17428 if (!OrigOperand.getOperand(0).isUndef())
17429 break;
17430
17431 SDValue Op = OrigOperand.getOperand(1);
17432 if (Op.getOpcode() != ISD::FP_EXTEND)
17433 break;
17434
17435 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17436 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17437 if (NarrowSize != ScalarBits)
17438 break;
17439
17440 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17441 SupportsFPExt = true;
17442 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17443 Subtarget))
17444 SupportsBF16Ext = true;
17445 break;
17446 }
17447 default:
17448 break;
17449 }
17450 }
17451
17452 /// Check if \p Root supports any extension folding combines.
17453 static bool isSupportedRoot(const SDNode *Root,
17454 const RISCVSubtarget &Subtarget) {
17455 switch (Root->getOpcode()) {
17456 case ISD::ADD:
17457 case ISD::SUB:
17458 case ISD::MUL: {
17459 return Root->getValueType(0).isScalableVector();
17460 }
17461 case ISD::OR: {
17462 return Root->getValueType(0).isScalableVector() &&
17463 Root->getFlags().hasDisjoint();
17464 }
17465 // Vector Widening Integer Add/Sub/Mul Instructions
17466 case RISCVISD::ADD_VL:
17467 case RISCVISD::MUL_VL:
17468 case RISCVISD::VWADD_W_VL:
17469 case RISCVISD::VWADDU_W_VL:
17470 case RISCVISD::SUB_VL:
17471 case RISCVISD::VWSUB_W_VL:
17472 case RISCVISD::VWSUBU_W_VL:
17473 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17474 case RISCVISD::FADD_VL:
17475 case RISCVISD::FSUB_VL:
17476 case RISCVISD::FMUL_VL:
17477 case RISCVISD::VFWADD_W_VL:
17478 case RISCVISD::VFWSUB_W_VL:
17479 return true;
17480 case RISCVISD::OR_VL:
17481 return Root->getFlags().hasDisjoint();
17482 case ISD::SHL:
17483 return Root->getValueType(0).isScalableVector() &&
17484 Subtarget.hasStdExtZvbb();
17485 case RISCVISD::SHL_VL:
17486 return Subtarget.hasStdExtZvbb();
17487 case RISCVISD::VFMADD_VL:
17488 case RISCVISD::VFNMSUB_VL:
17489 case RISCVISD::VFNMADD_VL:
17490 case RISCVISD::VFMSUB_VL:
17491 return true;
17492 default:
17493 return false;
17494 }
17495 }
17496
17497 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17498 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17499 const RISCVSubtarget &Subtarget) {
17500 assert(isSupportedRoot(Root, Subtarget) &&
17501 "Trying to build an helper with an "
17502 "unsupported root");
17503 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17505 OrigOperand = Root->getOperand(OperandIdx);
17506
17507 unsigned Opc = Root->getOpcode();
17508 switch (Opc) {
17509 // We consider
17510 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17511 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17512 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17513 case RISCVISD::VWADD_W_VL:
17514 case RISCVISD::VWADDU_W_VL:
17515 case RISCVISD::VWSUB_W_VL:
17516 case RISCVISD::VWSUBU_W_VL:
17517 case RISCVISD::VFWADD_W_VL:
17518 case RISCVISD::VFWSUB_W_VL:
17519 // Operand 1 can't be changed.
17520 if (OperandIdx == 1)
17521 break;
17522 [[fallthrough]];
17523 default:
17524 fillUpExtensionSupport(Root, DAG, Subtarget);
17525 break;
17526 }
17527 }
17528
17529 /// Helper function to get the Mask and VL from \p Root.
17530 static std::pair<SDValue, SDValue>
17531 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17532 const RISCVSubtarget &Subtarget) {
17533 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17534 switch (Root->getOpcode()) {
17535 case ISD::ADD:
17536 case ISD::SUB:
17537 case ISD::MUL:
17538 case ISD::OR:
17539 case ISD::SHL: {
17540 SDLoc DL(Root);
17541 MVT VT = Root->getSimpleValueType(0);
17542 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17543 }
17544 default:
17545 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17546 }
17547 }
17548
17549 /// Helper function to check if \p N is commutative with respect to the
17550 /// foldings that are supported by this class.
17551 static bool isCommutative(const SDNode *N) {
17552 switch (N->getOpcode()) {
17553 case ISD::ADD:
17554 case ISD::MUL:
17555 case ISD::OR:
17556 case RISCVISD::ADD_VL:
17557 case RISCVISD::MUL_VL:
17558 case RISCVISD::OR_VL:
17559 case RISCVISD::FADD_VL:
17560 case RISCVISD::FMUL_VL:
17561 case RISCVISD::VFMADD_VL:
17562 case RISCVISD::VFNMSUB_VL:
17563 case RISCVISD::VFNMADD_VL:
17564 case RISCVISD::VFMSUB_VL:
17565 return true;
17566 case RISCVISD::VWADD_W_VL:
17567 case RISCVISD::VWADDU_W_VL:
17568 case ISD::SUB:
17569 case RISCVISD::SUB_VL:
17570 case RISCVISD::VWSUB_W_VL:
17571 case RISCVISD::VWSUBU_W_VL:
17572 case RISCVISD::VFWADD_W_VL:
17573 case RISCVISD::FSUB_VL:
17574 case RISCVISD::VFWSUB_W_VL:
17575 case ISD::SHL:
17576 case RISCVISD::SHL_VL:
17577 return false;
17578 default:
17579 llvm_unreachable("Unexpected opcode");
17580 }
17581 }
17582
17583 /// Get a list of combine to try for folding extensions in \p Root.
17584 /// Note that each returned CombineToTry function doesn't actually modify
17585 /// anything. Instead they produce an optional CombineResult that if not None,
17586 /// need to be materialized for the combine to be applied.
17587 /// \see CombineResult::materialize.
17588 /// If the related CombineToTry function returns std::nullopt, that means the
17589 /// combine didn't match.
17590 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17591};
17592
17593/// Helper structure that holds all the necessary information to materialize a
17594/// combine that does some extension folding.
17595struct CombineResult {
17596 /// Opcode to be generated when materializing the combine.
17597 unsigned TargetOpcode;
17598 // No value means no extension is needed.
17599 std::optional<ExtKind> LHSExt;
17600 std::optional<ExtKind> RHSExt;
17601 /// Root of the combine.
17602 SDNode *Root;
17603 /// LHS of the TargetOpcode.
17604 NodeExtensionHelper LHS;
17605 /// RHS of the TargetOpcode.
17606 NodeExtensionHelper RHS;
17607
17608 CombineResult(unsigned TargetOpcode, SDNode *Root,
17609 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17610 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17611 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17612 LHS(LHS), RHS(RHS) {}
17613
17614 /// Return a value that uses TargetOpcode and that can be used to replace
17615 /// Root.
17616 /// The actual replacement is *not* done in that method.
17617 SDValue materialize(SelectionDAG &DAG,
17618 const RISCVSubtarget &Subtarget) const {
17619 SDValue Mask, VL, Passthru;
17620 std::tie(Mask, VL) =
17621 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17622 switch (Root->getOpcode()) {
17623 default:
17624 Passthru = Root->getOperand(2);
17625 break;
17626 case ISD::ADD:
17627 case ISD::SUB:
17628 case ISD::MUL:
17629 case ISD::OR:
17630 case ISD::SHL:
17631 Passthru = DAG.getUNDEF(Root->getValueType(0));
17632 break;
17633 }
17634 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17635 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17636 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17637 Passthru, Mask, VL);
17638 }
17639};
17640
17641/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17642/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17643/// are zext) and LHS and RHS can be folded into Root.
17644/// AllowExtMask define which form `ext` can take in this pattern.
17645///
17646/// \note If the pattern can match with both zext and sext, the returned
17647/// CombineResult will feature the zext result.
17648///
17649/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17650/// can be used to apply the pattern.
17651static std::optional<CombineResult>
17652canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17653 const NodeExtensionHelper &RHS,
17654 uint8_t AllowExtMask, SelectionDAG &DAG,
17655 const RISCVSubtarget &Subtarget) {
17656 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17657 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17658 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17659 /*RHSExt=*/{ExtKind::ZExt});
17660 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17661 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17662 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17663 /*RHSExt=*/{ExtKind::SExt});
17664 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17665 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17666 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17667 /*RHSExt=*/{ExtKind::FPExt});
17668 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17669 RHS.SupportsBF16Ext)
17670 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17671 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17672 /*RHSExt=*/{ExtKind::BF16Ext});
17673 return std::nullopt;
17674}
17675
17676/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17677/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17678/// are zext) and LHS and RHS can be folded into Root.
17679///
17680/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17681/// can be used to apply the pattern.
17682static std::optional<CombineResult>
17683canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17684 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17685 const RISCVSubtarget &Subtarget) {
17686 return canFoldToVWWithSameExtensionImpl(
17687 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17688 Subtarget);
17689}
17690
17691/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17692///
17693/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17694/// can be used to apply the pattern.
17695static std::optional<CombineResult>
17696canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17697 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17698 const RISCVSubtarget &Subtarget) {
17699 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17700 Subtarget);
17701}
17702
17703/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17704///
17705/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17706/// can be used to apply the pattern.
17707static std::optional<CombineResult>
17708canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
17709 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17710 const RISCVSubtarget &Subtarget) {
17711 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17712 Subtarget);
17713}
17714
17715/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17716///
17717/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17718/// can be used to apply the pattern.
17719static std::optional<CombineResult>
17720canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17721 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17722 const RISCVSubtarget &Subtarget) {
17723 if (RHS.SupportsFPExt)
17724 return CombineResult(
17725 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17726 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17727
17728 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17729 // sext/zext?
17730 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17731 // purposes.
17732 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17733 return CombineResult(
17734 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17735 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17736 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17737 return CombineResult(
17738 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17739 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17740 return std::nullopt;
17741}
17742
17743/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
17744///
17745/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17746/// can be used to apply the pattern.
17747static std::optional<CombineResult>
17748canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17749 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17750 const RISCVSubtarget &Subtarget) {
17751 if (LHS.SupportsSExt)
17752 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17753 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17754 /*RHSExt=*/std::nullopt);
17755 return std::nullopt;
17756}
17757
17758/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
17759///
17760/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17761/// can be used to apply the pattern.
17762static std::optional<CombineResult>
17763canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17764 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17765 const RISCVSubtarget &Subtarget) {
17766 if (LHS.SupportsZExt)
17767 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17768 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17769 /*RHSExt=*/std::nullopt);
17770 return std::nullopt;
17771}
17772
17773/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
17774///
17775/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17776/// can be used to apply the pattern.
17777static std::optional<CombineResult>
17778canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17779 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17780 const RISCVSubtarget &Subtarget) {
17781 if (LHS.SupportsFPExt)
17782 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17783 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17784 /*RHSExt=*/std::nullopt);
17785 return std::nullopt;
17786}
17787
17788/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17789///
17790/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17791/// can be used to apply the pattern.
17792static std::optional<CombineResult>
17793canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17794 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17795 const RISCVSubtarget &Subtarget) {
17796
17797 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17798 return std::nullopt;
17799 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17800 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17801 /*RHSExt=*/{ExtKind::ZExt});
17802}
17803
17805NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17806 SmallVector<CombineToTry> Strategies;
17807 switch (Root->getOpcode()) {
17808 case ISD::ADD:
17809 case ISD::SUB:
17810 case ISD::OR:
17811 case RISCVISD::ADD_VL:
17812 case RISCVISD::SUB_VL:
17813 case RISCVISD::OR_VL:
17814 case RISCVISD::FADD_VL:
17815 case RISCVISD::FSUB_VL:
17816 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17817 Strategies.push_back(canFoldToVWWithSameExtension);
17818 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17819 Strategies.push_back(canFoldToVW_W);
17820 break;
17821 case RISCVISD::FMUL_VL:
17822 case RISCVISD::VFMADD_VL:
17823 case RISCVISD::VFMSUB_VL:
17824 case RISCVISD::VFNMADD_VL:
17825 case RISCVISD::VFNMSUB_VL:
17826 Strategies.push_back(canFoldToVWWithSameExtension);
17827 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17828 Strategies.push_back(canFoldToVWWithSameExtBF16);
17829 break;
17830 case ISD::MUL:
17831 case RISCVISD::MUL_VL:
17832 // mul -> vwmul(u)
17833 Strategies.push_back(canFoldToVWWithSameExtension);
17834 // mul -> vwmulsu
17835 Strategies.push_back(canFoldToVW_SU);
17836 break;
17837 case ISD::SHL:
17838 case RISCVISD::SHL_VL:
17839 // shl -> vwsll
17840 Strategies.push_back(canFoldToVWWithSameExtZEXT);
17841 break;
17842 case RISCVISD::VWADD_W_VL:
17843 case RISCVISD::VWSUB_W_VL:
17844 // vwadd_w|vwsub_w -> vwadd|vwsub
17845 Strategies.push_back(canFoldToVWWithSEXT);
17846 break;
17847 case RISCVISD::VWADDU_W_VL:
17848 case RISCVISD::VWSUBU_W_VL:
17849 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17850 Strategies.push_back(canFoldToVWWithZEXT);
17851 break;
17852 case RISCVISD::VFWADD_W_VL:
17853 case RISCVISD::VFWSUB_W_VL:
17854 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17855 Strategies.push_back(canFoldToVWWithFPEXT);
17856 break;
17857 default:
17858 llvm_unreachable("Unexpected opcode");
17859 }
17860 return Strategies;
17861}
17862} // End anonymous namespace.
17863
17865 // TODO: Extend this to other binops using generic identity logic
17866 assert(N->getOpcode() == RISCVISD::ADD_VL);
17867 SDValue A = N->getOperand(0);
17868 SDValue B = N->getOperand(1);
17869 SDValue Passthru = N->getOperand(2);
17870 if (!Passthru.isUndef())
17871 // TODO:This could be a vmerge instead
17872 return SDValue();
17873 ;
17875 return A;
17876 // Peek through fixed to scalable
17877 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17878 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17879 return A;
17880 return SDValue();
17881}
17882
17883/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17884/// The supported combines are:
17885/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17886/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17887/// mul | mul_vl -> vwmul(u) | vwmul_su
17888/// shl | shl_vl -> vwsll
17889/// fadd_vl -> vfwadd | vfwadd_w
17890/// fsub_vl -> vfwsub | vfwsub_w
17891/// fmul_vl -> vfwmul
17892/// vwadd_w(u) -> vwadd(u)
17893/// vwsub_w(u) -> vwsub(u)
17894/// vfwadd_w -> vfwadd
17895/// vfwsub_w -> vfwsub
17898 const RISCVSubtarget &Subtarget) {
17899 SelectionDAG &DAG = DCI.DAG;
17900 if (DCI.isBeforeLegalize())
17901 return SDValue();
17902
17903 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17904 return SDValue();
17905
17906 SmallVector<SDNode *> Worklist;
17907 SmallPtrSet<SDNode *, 8> Inserted;
17908 SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
17909 Worklist.push_back(N);
17910 Inserted.insert(N);
17911 SmallVector<CombineResult> CombinesToApply;
17912
17913 while (!Worklist.empty()) {
17914 SDNode *Root = Worklist.pop_back_val();
17915
17916 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17917 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17918 auto AppendUsersIfNeeded =
17919 [&Worklist, &Subtarget, &Inserted,
17920 &ExtensionsToRemove](const NodeExtensionHelper &Op) {
17921 if (Op.needToPromoteOtherUsers()) {
17922 // Remember that we're supposed to remove this extension.
17923 ExtensionsToRemove.insert(Op.OrigOperand.getNode());
17924 for (SDUse &Use : Op.OrigOperand->uses()) {
17925 SDNode *TheUser = Use.getUser();
17926 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
17927 return false;
17928 // We only support the first 2 operands of FMA.
17929 if (Use.getOperandNo() >= 2)
17930 return false;
17931 if (Inserted.insert(TheUser).second)
17932 Worklist.push_back(TheUser);
17933 }
17934 }
17935 return true;
17936 };
17937
17938 // Control the compile time by limiting the number of node we look at in
17939 // total.
17940 if (Inserted.size() > ExtensionMaxWebSize)
17941 return SDValue();
17942
17944 NodeExtensionHelper::getSupportedFoldings(Root);
17945
17946 assert(!FoldingStrategies.empty() && "Nothing to be folded");
17947 bool Matched = false;
17948 for (int Attempt = 0;
17949 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
17950 ++Attempt) {
17951
17952 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
17953 FoldingStrategies) {
17954 std::optional<CombineResult> Res =
17955 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
17956 if (Res) {
17957 // If this strategy wouldn't remove an extension we're supposed to
17958 // remove, reject it.
17959 if (!Res->LHSExt.has_value() &&
17960 ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
17961 continue;
17962 if (!Res->RHSExt.has_value() &&
17963 ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
17964 continue;
17965
17966 Matched = true;
17967 CombinesToApply.push_back(*Res);
17968 // All the inputs that are extended need to be folded, otherwise
17969 // we would be leaving the old input (since it is may still be used),
17970 // and the new one.
17971 if (Res->LHSExt.has_value())
17972 if (!AppendUsersIfNeeded(LHS))
17973 return SDValue();
17974 if (Res->RHSExt.has_value())
17975 if (!AppendUsersIfNeeded(RHS))
17976 return SDValue();
17977 break;
17978 }
17979 }
17980 std::swap(LHS, RHS);
17981 }
17982 // Right now we do an all or nothing approach.
17983 if (!Matched)
17984 return SDValue();
17985 }
17986 // Store the value for the replacement of the input node separately.
17987 SDValue InputRootReplacement;
17988 // We do the RAUW after we materialize all the combines, because some replaced
17989 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
17990 // some of these nodes may appear in the NodeExtensionHelpers of some of the
17991 // yet-to-be-visited CombinesToApply roots.
17993 ValuesToReplace.reserve(CombinesToApply.size());
17994 for (CombineResult Res : CombinesToApply) {
17995 SDValue NewValue = Res.materialize(DAG, Subtarget);
17996 if (!InputRootReplacement) {
17997 assert(Res.Root == N &&
17998 "First element is expected to be the current node");
17999 InputRootReplacement = NewValue;
18000 } else {
18001 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
18002 }
18003 }
18004 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
18005 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
18006 DCI.AddToWorklist(OldNewValues.second.getNode());
18007 }
18008 return InputRootReplacement;
18009}
18010
18011// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
18012// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
18013// y will be the Passthru and cond will be the Mask.
18015 unsigned Opc = N->getOpcode();
18016 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18017 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18018
18019 SDValue Y = N->getOperand(0);
18020 SDValue MergeOp = N->getOperand(1);
18021 unsigned MergeOpc = MergeOp.getOpcode();
18022
18023 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
18024 return SDValue();
18025
18026 SDValue X = MergeOp->getOperand(1);
18027
18028 if (!MergeOp.hasOneUse())
18029 return SDValue();
18030
18031 // Passthru should be undef
18032 SDValue Passthru = N->getOperand(2);
18033 if (!Passthru.isUndef())
18034 return SDValue();
18035
18036 // Mask should be all ones
18037 SDValue Mask = N->getOperand(3);
18038 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
18039 return SDValue();
18040
18041 // False value of MergeOp should be all zeros
18042 SDValue Z = MergeOp->getOperand(2);
18043
18044 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
18045 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
18046 Z = Z.getOperand(1);
18047
18048 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
18049 return SDValue();
18050
18051 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
18052 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
18053 N->getFlags());
18054}
18055
18058 const RISCVSubtarget &Subtarget) {
18059 [[maybe_unused]] unsigned Opc = N->getOpcode();
18060 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18061 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18062
18063 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18064 return V;
18065
18066 return combineVWADDSUBWSelect(N, DCI.DAG);
18067}
18068
18069// Helper function for performMemPairCombine.
18070// Try to combine the memory loads/stores LSNode1 and LSNode2
18071// into a single memory pair operation.
18073 LSBaseSDNode *LSNode2, SDValue BasePtr,
18074 uint64_t Imm) {
18076 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
18077
18078 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
18079 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
18080 return SDValue();
18081
18083 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18084
18085 // The new operation has twice the width.
18086 MVT XLenVT = Subtarget.getXLenVT();
18087 EVT MemVT = LSNode1->getMemoryVT();
18088 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
18089 MachineMemOperand *MMO = LSNode1->getMemOperand();
18091 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
18092
18093 if (LSNode1->getOpcode() == ISD::LOAD) {
18094 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
18095 unsigned Opcode;
18096 if (MemVT == MVT::i32)
18097 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
18098 else
18099 Opcode = RISCVISD::TH_LDD;
18100
18101 SDValue Res = DAG.getMemIntrinsicNode(
18102 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
18103 {LSNode1->getChain(), BasePtr,
18104 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18105 NewMemVT, NewMMO);
18106
18107 SDValue Node1 =
18108 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
18109 SDValue Node2 =
18110 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
18111
18112 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
18113 return Node1;
18114 } else {
18115 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
18116
18117 SDValue Res = DAG.getMemIntrinsicNode(
18118 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
18119 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
18120 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18121 NewMemVT, NewMMO);
18122
18123 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
18124 return Res;
18125 }
18126}
18127
18128// Try to combine two adjacent loads/stores to a single pair instruction from
18129// the XTHeadMemPair vendor extension.
18132 SelectionDAG &DAG = DCI.DAG;
18134 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18135
18136 // Target does not support load/store pair.
18137 if (!Subtarget.hasVendorXTHeadMemPair())
18138 return SDValue();
18139
18140 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
18141 EVT MemVT = LSNode1->getMemoryVT();
18142 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
18143
18144 // No volatile, indexed or atomic loads/stores.
18145 if (!LSNode1->isSimple() || LSNode1->isIndexed())
18146 return SDValue();
18147
18148 // Function to get a base + constant representation from a memory value.
18149 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
18150 if (Ptr->getOpcode() == ISD::ADD)
18151 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
18152 return {Ptr->getOperand(0), C1->getZExtValue()};
18153 return {Ptr, 0};
18154 };
18155
18156 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
18157
18158 SDValue Chain = N->getOperand(0);
18159 for (SDUse &Use : Chain->uses()) {
18160 if (Use.getUser() != N && Use.getResNo() == 0 &&
18161 Use.getUser()->getOpcode() == N->getOpcode()) {
18163
18164 // No volatile, indexed or atomic loads/stores.
18165 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18166 continue;
18167
18168 // Check if LSNode1 and LSNode2 have the same type and extension.
18169 if (LSNode1->getOpcode() == ISD::LOAD)
18170 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18172 continue;
18173
18174 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18175 continue;
18176
18177 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18178
18179 // Check if the base pointer is the same for both instruction.
18180 if (Base1 != Base2)
18181 continue;
18182
18183 // Check if the offsets match the XTHeadMemPair encoding constraints.
18184 bool Valid = false;
18185 if (MemVT == MVT::i32) {
18186 // Check for adjacent i32 values and a 2-bit index.
18187 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18188 Valid = true;
18189 } else if (MemVT == MVT::i64) {
18190 // Check for adjacent i64 values and a 2-bit index.
18191 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18192 Valid = true;
18193 }
18194
18195 if (!Valid)
18196 continue;
18197
18198 // Try to combine.
18199 if (SDValue Res =
18200 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18201 return Res;
18202 }
18203 }
18204
18205 return SDValue();
18206}
18207
18208// Fold
18209// (fp_to_int (froundeven X)) -> fcvt X, rne
18210// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18211// (fp_to_int (ffloor X)) -> fcvt X, rdn
18212// (fp_to_int (fceil X)) -> fcvt X, rup
18213// (fp_to_int (fround X)) -> fcvt X, rmm
18214// (fp_to_int (frint X)) -> fcvt X
18217 const RISCVSubtarget &Subtarget) {
18218 SelectionDAG &DAG = DCI.DAG;
18219 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18220 MVT XLenVT = Subtarget.getXLenVT();
18221
18222 SDValue Src = N->getOperand(0);
18223
18224 // Don't do this for strict-fp Src.
18225 if (Src->isStrictFPOpcode())
18226 return SDValue();
18227
18228 // Ensure the FP type is legal.
18229 if (!TLI.isTypeLegal(Src.getValueType()))
18230 return SDValue();
18231
18232 // Don't do this for f16 with Zfhmin and not Zfh.
18233 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18234 return SDValue();
18235
18236 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18237 // If the result is invalid, we didn't find a foldable instruction.
18238 if (FRM == RISCVFPRndMode::Invalid)
18239 return SDValue();
18240
18241 SDLoc DL(N);
18242 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18243 EVT VT = N->getValueType(0);
18244
18245 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18246 MVT SrcVT = Src.getSimpleValueType();
18247 MVT SrcContainerVT = SrcVT;
18248 MVT ContainerVT = VT.getSimpleVT();
18249 SDValue XVal = Src.getOperand(0);
18250
18251 // For widening and narrowing conversions we just combine it into a
18252 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18253 // end up getting lowered to their appropriate pseudo instructions based on
18254 // their operand types
18255 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18256 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18257 return SDValue();
18258
18259 // Make fixed-length vectors scalable first
18260 if (SrcVT.isFixedLengthVector()) {
18261 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18262 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18263 ContainerVT =
18264 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18265 }
18266
18267 auto [Mask, VL] =
18268 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18269
18270 SDValue FpToInt;
18271 if (FRM == RISCVFPRndMode::RTZ) {
18272 // Use the dedicated trunc static rounding mode if we're truncating so we
18273 // don't need to generate calls to fsrmi/fsrm
18274 unsigned Opc =
18275 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18276 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18277 } else {
18278 unsigned Opc =
18279 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18280 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18281 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18282 }
18283
18284 // If converted from fixed-length to scalable, convert back
18285 if (VT.isFixedLengthVector())
18286 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18287
18288 return FpToInt;
18289 }
18290
18291 // Only handle XLen or i32 types. Other types narrower than XLen will
18292 // eventually be legalized to XLenVT.
18293 if (VT != MVT::i32 && VT != XLenVT)
18294 return SDValue();
18295
18296 unsigned Opc;
18297 if (VT == XLenVT)
18298 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18299 else
18300 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18301
18302 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18303 DAG.getTargetConstant(FRM, DL, XLenVT));
18304 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18305}
18306
18307// Fold
18308// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18309// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18310// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18311// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18312// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18313// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18316 const RISCVSubtarget &Subtarget) {
18317 SelectionDAG &DAG = DCI.DAG;
18318 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18319 MVT XLenVT = Subtarget.getXLenVT();
18320
18321 // Only handle XLen types. Other types narrower than XLen will eventually be
18322 // legalized to XLenVT.
18323 EVT DstVT = N->getValueType(0);
18324 if (DstVT != XLenVT)
18325 return SDValue();
18326
18327 SDValue Src = N->getOperand(0);
18328
18329 // Don't do this for strict-fp Src.
18330 if (Src->isStrictFPOpcode())
18331 return SDValue();
18332
18333 // Ensure the FP type is also legal.
18334 if (!TLI.isTypeLegal(Src.getValueType()))
18335 return SDValue();
18336
18337 // Don't do this for f16 with Zfhmin and not Zfh.
18338 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18339 return SDValue();
18340
18341 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18342
18343 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18344 if (FRM == RISCVFPRndMode::Invalid)
18345 return SDValue();
18346
18347 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18348
18349 unsigned Opc;
18350 if (SatVT == DstVT)
18351 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18352 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18353 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18354 else
18355 return SDValue();
18356 // FIXME: Support other SatVTs by clamping before or after the conversion.
18357
18358 Src = Src.getOperand(0);
18359
18360 SDLoc DL(N);
18361 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18362 DAG.getTargetConstant(FRM, DL, XLenVT));
18363
18364 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18365 // extend.
18366 if (Opc == RISCVISD::FCVT_WU_RV64)
18367 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18368
18369 // RISC-V FP-to-int conversions saturate to the destination register size, but
18370 // don't produce 0 for nan.
18371 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18372 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18373}
18374
18375// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18376// smaller than XLenVT.
18378 const RISCVSubtarget &Subtarget) {
18379 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18380
18381 SDValue Src = N->getOperand(0);
18382 if (Src.getOpcode() != ISD::BSWAP)
18383 return SDValue();
18384
18385 EVT VT = N->getValueType(0);
18386 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18388 return SDValue();
18389
18390 SDLoc DL(N);
18391 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18392}
18393
18395 const RISCVSubtarget &Subtarget) {
18396 // Fold:
18397 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18398
18399 // Check if its first operand is a vp.load.
18400 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18401 if (!VPLoad)
18402 return SDValue();
18403
18404 EVT LoadVT = VPLoad->getValueType(0);
18405 // We do not have a strided_load version for masks, and the evl of vp.reverse
18406 // and vp.load should always be the same.
18407 if (!LoadVT.getVectorElementType().isByteSized() ||
18408 N->getOperand(2) != VPLoad->getVectorLength() ||
18409 !N->getOperand(0).hasOneUse())
18410 return SDValue();
18411
18412 // Check if the mask of outer vp.reverse are all 1's.
18413 if (!isOneOrOneSplat(N->getOperand(1)))
18414 return SDValue();
18415
18416 SDValue LoadMask = VPLoad->getMask();
18417 // If Mask is all ones, then load is unmasked and can be reversed.
18418 if (!isOneOrOneSplat(LoadMask)) {
18419 // If the mask is not all ones, we can reverse the load if the mask was also
18420 // reversed by an unmasked vp.reverse with the same EVL.
18421 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18422 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18423 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18424 return SDValue();
18425 LoadMask = LoadMask.getOperand(0);
18426 }
18427
18428 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18429 SDLoc DL(N);
18430 MVT XLenVT = Subtarget.getXLenVT();
18431 SDValue NumElem = VPLoad->getVectorLength();
18432 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18433
18434 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18435 DAG.getConstant(1, DL, XLenVT));
18436 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18437 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18438 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18439 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18440
18442 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18444 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18445 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18446
18447 SDValue Ret = DAG.getStridedLoadVP(
18448 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18449 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18450
18451 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18452
18453 return Ret;
18454}
18455
18457 const RISCVSubtarget &Subtarget) {
18458 // Fold:
18459 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18460 // -1, MASK)
18461 auto *VPStore = cast<VPStoreSDNode>(N);
18462
18463 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18464 return SDValue();
18465
18466 SDValue VPReverse = VPStore->getValue();
18467 EVT ReverseVT = VPReverse->getValueType(0);
18468
18469 // We do not have a strided_store version for masks, and the evl of vp.reverse
18470 // and vp.store should always be the same.
18471 if (!ReverseVT.getVectorElementType().isByteSized() ||
18472 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18473 !VPReverse.hasOneUse())
18474 return SDValue();
18475
18476 SDValue StoreMask = VPStore->getMask();
18477 // If Mask is all ones, then load is unmasked and can be reversed.
18478 if (!isOneOrOneSplat(StoreMask)) {
18479 // If the mask is not all ones, we can reverse the store if the mask was
18480 // also reversed by an unmasked vp.reverse with the same EVL.
18481 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18482 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18483 StoreMask.getOperand(2) != VPStore->getVectorLength())
18484 return SDValue();
18485 StoreMask = StoreMask.getOperand(0);
18486 }
18487
18488 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18489 SDLoc DL(N);
18490 MVT XLenVT = Subtarget.getXLenVT();
18491 SDValue NumElem = VPStore->getVectorLength();
18492 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18493
18494 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18495 DAG.getConstant(1, DL, XLenVT));
18496 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18497 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18498 SDValue Base =
18499 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18500 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18501
18503 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18505 PtrInfo, VPStore->getMemOperand()->getFlags(),
18506 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18507
18508 return DAG.getStridedStoreVP(
18509 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18510 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18511 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18512 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18513}
18514
18515// Peephole avgceil pattern.
18516// %1 = zext <N x i8> %a to <N x i32>
18517// %2 = zext <N x i8> %b to <N x i32>
18518// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18519// %4 = add nuw nsw <N x i32> %3, %2
18520// %5 = lshr <N x i32> %4, splat (i32 1)
18521// %6 = trunc <N x i32> %5 to <N x i8>
18523 const RISCVSubtarget &Subtarget) {
18524 EVT VT = N->getValueType(0);
18525
18526 // Ignore fixed vectors.
18527 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18528 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18529 return SDValue();
18530
18531 SDValue In = N->getOperand(0);
18532 SDValue Mask = N->getOperand(1);
18533 SDValue VL = N->getOperand(2);
18534
18535 // Input should be a vp_srl with same mask and VL.
18536 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18537 In.getOperand(3) != VL)
18538 return SDValue();
18539
18540 // Shift amount should be 1.
18541 if (!isOneOrOneSplat(In.getOperand(1)))
18542 return SDValue();
18543
18544 // Shifted value should be a vp_add with same mask and VL.
18545 SDValue LHS = In.getOperand(0);
18546 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18547 LHS.getOperand(3) != VL)
18548 return SDValue();
18549
18550 SDValue Operands[3];
18551
18552 // Matches another VP_ADD with same VL and Mask.
18553 auto FindAdd = [&](SDValue V, SDValue Other) {
18554 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18555 V.getOperand(3) != VL)
18556 return false;
18557
18558 Operands[0] = Other;
18559 Operands[1] = V.getOperand(1);
18560 Operands[2] = V.getOperand(0);
18561 return true;
18562 };
18563
18564 // We need to find another VP_ADD in one of the operands.
18565 SDValue LHS0 = LHS.getOperand(0);
18566 SDValue LHS1 = LHS.getOperand(1);
18567 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18568 return SDValue();
18569
18570 // Now we have three operands of two additions. Check that one of them is a
18571 // constant vector with ones.
18572 auto I = llvm::find_if(Operands,
18573 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18574 if (I == std::end(Operands))
18575 return SDValue();
18576 // We found a vector with ones, move if it to the end of the Operands array.
18577 std::swap(*I, Operands[2]);
18578
18579 // Make sure the other 2 operands can be promoted from the result type.
18580 for (SDValue Op : drop_end(Operands)) {
18581 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18582 Op.getOperand(2) != VL)
18583 return SDValue();
18584 // Input must be the same size or smaller than our result.
18585 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18586 return SDValue();
18587 }
18588
18589 // Pattern is detected.
18590 // Rebuild the zero extends in case the inputs are smaller than our result.
18591 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18592 Operands[0].getOperand(0), Mask, VL);
18593 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18594 Operands[1].getOperand(0), Mask, VL);
18595 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18596 // mode.
18597 SDLoc DL(N);
18598 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18599 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18600}
18601
18602// Convert from one FMA opcode to another based on whether we are negating the
18603// multiply result and/or the accumulator.
18604// NOTE: Only supports RVV operations with VL.
18605static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18606 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18607 if (NegMul) {
18608 // clang-format off
18609 switch (Opcode) {
18610 default: llvm_unreachable("Unexpected opcode");
18611 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18612 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18613 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18614 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18615 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18616 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18617 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18618 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18619 }
18620 // clang-format on
18621 }
18622
18623 // Negating the accumulator changes ADD<->SUB.
18624 if (NegAcc) {
18625 // clang-format off
18626 switch (Opcode) {
18627 default: llvm_unreachable("Unexpected opcode");
18628 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18629 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18630 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18631 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18632 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18633 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18634 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18635 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18636 }
18637 // clang-format on
18638 }
18639
18640 return Opcode;
18641}
18642
18644 // Fold FNEG_VL into FMA opcodes.
18645 // The first operand of strict-fp is chain.
18646 bool IsStrict =
18647 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18648 unsigned Offset = IsStrict ? 1 : 0;
18649 SDValue A = N->getOperand(0 + Offset);
18650 SDValue B = N->getOperand(1 + Offset);
18651 SDValue C = N->getOperand(2 + Offset);
18652 SDValue Mask = N->getOperand(3 + Offset);
18653 SDValue VL = N->getOperand(4 + Offset);
18654
18655 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18656 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18657 V.getOperand(2) == VL) {
18658 // Return the negated input.
18659 V = V.getOperand(0);
18660 return true;
18661 }
18662
18663 return false;
18664 };
18665
18666 bool NegA = invertIfNegative(A);
18667 bool NegB = invertIfNegative(B);
18668 bool NegC = invertIfNegative(C);
18669
18670 // If no operands are negated, we're done.
18671 if (!NegA && !NegB && !NegC)
18672 return SDValue();
18673
18674 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18675 if (IsStrict)
18676 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18677 {N->getOperand(0), A, B, C, Mask, VL});
18678 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18679 VL);
18680}
18681
18684 const RISCVSubtarget &Subtarget) {
18685 SelectionDAG &DAG = DCI.DAG;
18686
18688 return V;
18689
18690 // FIXME: Ignore strict opcodes for now.
18691 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18692 return SDValue();
18693
18694 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18695}
18696
18698 const RISCVSubtarget &Subtarget) {
18699 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18700
18701 EVT VT = N->getValueType(0);
18702
18703 if (VT != Subtarget.getXLenVT())
18704 return SDValue();
18705
18706 if (!isa<ConstantSDNode>(N->getOperand(1)))
18707 return SDValue();
18708 uint64_t ShAmt = N->getConstantOperandVal(1);
18709
18710 SDValue N0 = N->getOperand(0);
18711
18712 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18713 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18714 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18715 unsigned ExtSize =
18716 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18717 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18718 N0.getOperand(0).hasOneUse() &&
18720 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18721 if (LShAmt < ExtSize) {
18722 unsigned Size = VT.getSizeInBits();
18723 SDLoc ShlDL(N0.getOperand(0));
18724 SDValue Shl =
18725 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18726 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18727 SDLoc DL(N);
18728 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18729 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18730 }
18731 }
18732 }
18733
18734 if (ShAmt > 32 || VT != MVT::i64)
18735 return SDValue();
18736
18737 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18738 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18739 //
18740 // Also try these folds where an add or sub is in the middle.
18741 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18742 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18743 SDValue Shl;
18744 ConstantSDNode *AddC = nullptr;
18745
18746 // We might have an ADD or SUB between the SRA and SHL.
18747 bool IsAdd = N0.getOpcode() == ISD::ADD;
18748 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18749 // Other operand needs to be a constant we can modify.
18750 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18751 if (!AddC)
18752 return SDValue();
18753
18754 // AddC needs to have at least 32 trailing zeros.
18755 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18756 return SDValue();
18757
18758 // All users should be a shift by constant less than or equal to 32. This
18759 // ensures we'll do this optimization for each of them to produce an
18760 // add/sub+sext_inreg they can all share.
18761 for (SDNode *U : N0->users()) {
18762 if (U->getOpcode() != ISD::SRA ||
18763 !isa<ConstantSDNode>(U->getOperand(1)) ||
18764 U->getConstantOperandVal(1) > 32)
18765 return SDValue();
18766 }
18767
18768 Shl = N0.getOperand(IsAdd ? 0 : 1);
18769 } else {
18770 // Not an ADD or SUB.
18771 Shl = N0;
18772 }
18773
18774 // Look for a shift left by 32.
18775 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18776 Shl.getConstantOperandVal(1) != 32)
18777 return SDValue();
18778
18779 // We if we didn't look through an add/sub, then the shl should have one use.
18780 // If we did look through an add/sub, the sext_inreg we create is free so
18781 // we're only creating 2 new instructions. It's enough to only remove the
18782 // original sra+add/sub.
18783 if (!AddC && !Shl.hasOneUse())
18784 return SDValue();
18785
18786 SDLoc DL(N);
18787 SDValue In = Shl.getOperand(0);
18788
18789 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18790 // constant.
18791 if (AddC) {
18792 SDValue ShiftedAddC =
18793 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18794 if (IsAdd)
18795 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18796 else
18797 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18798 }
18799
18800 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18801 DAG.getValueType(MVT::i32));
18802 if (ShAmt == 32)
18803 return SExt;
18804
18805 return DAG.getNode(
18806 ISD::SHL, DL, MVT::i64, SExt,
18807 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18808}
18809
18810// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18811// the result is used as the condition of a br_cc or select_cc we can invert,
18812// inverting the setcc is free, and Z is 0/1. Caller will invert the
18813// br_cc/select_cc.
18815 bool IsAnd = Cond.getOpcode() == ISD::AND;
18816 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18817 return SDValue();
18818
18819 if (!Cond.hasOneUse())
18820 return SDValue();
18821
18822 SDValue Setcc = Cond.getOperand(0);
18823 SDValue Xor = Cond.getOperand(1);
18824 // Canonicalize setcc to LHS.
18825 if (Setcc.getOpcode() != ISD::SETCC)
18826 std::swap(Setcc, Xor);
18827 // LHS should be a setcc and RHS should be an xor.
18828 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18829 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18830 return SDValue();
18831
18832 // If the condition is an And, SimplifyDemandedBits may have changed
18833 // (xor Z, 1) to (not Z).
18834 SDValue Xor1 = Xor.getOperand(1);
18835 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18836 return SDValue();
18837
18838 EVT VT = Cond.getValueType();
18839 SDValue Xor0 = Xor.getOperand(0);
18840
18841 // The LHS of the xor needs to be 0/1.
18843 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18844 return SDValue();
18845
18846 // We can only invert integer setccs.
18847 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18848 if (!SetCCOpVT.isScalarInteger())
18849 return SDValue();
18850
18851 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18852 if (ISD::isIntEqualitySetCC(CCVal)) {
18853 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18854 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18855 Setcc.getOperand(1), CCVal);
18856 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18857 // Invert (setlt 0, X) by converting to (setlt X, 1).
18858 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18859 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18860 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18861 // (setlt X, 1) by converting to (setlt 0, X).
18862 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18863 DAG.getConstant(0, SDLoc(Setcc), VT),
18864 Setcc.getOperand(0), CCVal);
18865 } else
18866 return SDValue();
18867
18868 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18869 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18870}
18871
18872// Perform common combines for BR_CC and SELECT_CC conditions.
18873static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18874 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18875 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18876
18877 // As far as arithmetic right shift always saves the sign,
18878 // shift can be omitted.
18879 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18880 // setge (sra X, N), 0 -> setge X, 0
18881 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18882 LHS.getOpcode() == ISD::SRA) {
18883 LHS = LHS.getOperand(0);
18884 return true;
18885 }
18886
18887 if (!ISD::isIntEqualitySetCC(CCVal))
18888 return false;
18889
18890 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18891 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18892 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18893 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18894 // If we're looking for eq 0 instead of ne 0, we need to invert the
18895 // condition.
18896 bool Invert = CCVal == ISD::SETEQ;
18897 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18898 if (Invert)
18899 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18900
18901 RHS = LHS.getOperand(1);
18902 LHS = LHS.getOperand(0);
18903 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18904
18905 CC = DAG.getCondCode(CCVal);
18906 return true;
18907 }
18908
18909 // If XOR is reused and has an immediate that will fit in XORI,
18910 // do not fold.
18911 auto isXorImmediate = [](const SDValue &Op) -> bool {
18912 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18913 return isInt<12>(XorCnst->getSExtValue());
18914 return false;
18915 };
18916 // Fold (X(i1) ^ 1) == 0 -> X != 0
18917 auto singleBitOp = [&DAG](const SDValue &VarOp,
18918 const SDValue &ConstOp) -> bool {
18919 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18920 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18921 return (XorCnst->getSExtValue() == 1) &&
18922 DAG.MaskedValueIsZero(VarOp, Mask);
18923 }
18924 return false;
18925 };
18926 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
18927 for (const SDNode *UserNode : Op->users()) {
18928 const unsigned Opcode = UserNode->getOpcode();
18929 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
18930 return false;
18931 }
18932 return true;
18933 };
18934 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
18935 const SDValue &LHS, const SDValue &RHS) -> bool {
18936 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
18937 (!isXorImmediate(LHS.getOperand(1)) ||
18938 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
18939 onlyUsedBySelectOrBR(LHS));
18940 };
18941 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
18942 if (isFoldableXorEq(LHS, RHS)) {
18943 RHS = LHS.getOperand(1);
18944 LHS = LHS.getOperand(0);
18945 return true;
18946 }
18947 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
18948 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
18949 const SDValue LHS0 = LHS.getOperand(0);
18950 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
18951 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
18952 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18953 LHS0.getOperand(1), LHS.getOperand(1));
18954 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
18955 LHS0.getOperand(0), LHS.getOperand(1));
18956 return true;
18957 }
18958 }
18959
18960 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
18961 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
18962 LHS.getOperand(1).getOpcode() == ISD::Constant) {
18963 SDValue LHS0 = LHS.getOperand(0);
18964 if (LHS0.getOpcode() == ISD::AND &&
18965 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
18966 uint64_t Mask = LHS0.getConstantOperandVal(1);
18967 uint64_t ShAmt = LHS.getConstantOperandVal(1);
18968 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
18969 // XAndesPerf supports branch on test bit.
18970 if (Subtarget.hasVendorXAndesPerf()) {
18971 LHS =
18972 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
18973 DAG.getConstant(Mask, DL, LHS.getValueType()));
18974 return true;
18975 }
18976
18977 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
18978 CC = DAG.getCondCode(CCVal);
18979
18980 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
18981 LHS = LHS0.getOperand(0);
18982 if (ShAmt != 0)
18983 LHS =
18984 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
18985 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
18986 return true;
18987 }
18988 }
18989 }
18990
18991 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
18992 // This can occur when legalizing some floating point comparisons.
18993 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
18994 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
18995 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18996 CC = DAG.getCondCode(CCVal);
18997 RHS = DAG.getConstant(0, DL, LHS.getValueType());
18998 return true;
18999 }
19000
19001 if (isNullConstant(RHS)) {
19002 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
19003 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19004 CC = DAG.getCondCode(CCVal);
19005 LHS = NewCond;
19006 return true;
19007 }
19008 }
19009
19010 return false;
19011}
19012
19013// Fold
19014// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
19015// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
19016// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
19017// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
19018// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
19019// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
19021 SDValue TrueVal, SDValue FalseVal,
19022 bool Swapped) {
19023 bool Commutative = true;
19024 unsigned Opc = TrueVal.getOpcode();
19025 switch (Opc) {
19026 default:
19027 return SDValue();
19028 case ISD::SHL:
19029 case ISD::SRA:
19030 case ISD::SRL:
19031 case ISD::SUB:
19032 case ISD::ROTL:
19033 case ISD::ROTR:
19034 Commutative = false;
19035 break;
19036 case ISD::ADD:
19037 case ISD::OR:
19038 case ISD::XOR:
19039 case ISD::UMIN:
19040 case ISD::UMAX:
19041 break;
19042 }
19043
19044 if (!TrueVal.hasOneUse())
19045 return SDValue();
19046
19047 unsigned OpToFold;
19048 if (FalseVal == TrueVal.getOperand(0))
19049 OpToFold = 0;
19050 else if (Commutative && FalseVal == TrueVal.getOperand(1))
19051 OpToFold = 1;
19052 else
19053 return SDValue();
19054
19055 EVT VT = N->getValueType(0);
19056 SDLoc DL(N);
19057 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
19058 EVT OtherOpVT = OtherOp.getValueType();
19059 SDValue IdentityOperand =
19060 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
19061 if (!Commutative)
19062 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
19063 assert(IdentityOperand && "No identity operand!");
19064
19065 if (Swapped)
19066 std::swap(OtherOp, IdentityOperand);
19067 SDValue NewSel =
19068 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
19069 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
19070}
19071
19072// This tries to get rid of `select` and `icmp` that are being used to handle
19073// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
19075 SDValue Cond = N->getOperand(0);
19076
19077 // This represents either CTTZ or CTLZ instruction.
19078 SDValue CountZeroes;
19079
19080 SDValue ValOnZero;
19081
19082 if (Cond.getOpcode() != ISD::SETCC)
19083 return SDValue();
19084
19085 if (!isNullConstant(Cond->getOperand(1)))
19086 return SDValue();
19087
19088 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
19089 if (CCVal == ISD::CondCode::SETEQ) {
19090 CountZeroes = N->getOperand(2);
19091 ValOnZero = N->getOperand(1);
19092 } else if (CCVal == ISD::CondCode::SETNE) {
19093 CountZeroes = N->getOperand(1);
19094 ValOnZero = N->getOperand(2);
19095 } else {
19096 return SDValue();
19097 }
19098
19099 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
19100 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
19101 CountZeroes = CountZeroes.getOperand(0);
19102
19103 if (CountZeroes.getOpcode() != ISD::CTTZ &&
19104 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
19105 CountZeroes.getOpcode() != ISD::CTLZ &&
19106 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
19107 return SDValue();
19108
19109 if (!isNullConstant(ValOnZero))
19110 return SDValue();
19111
19112 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
19113 if (Cond->getOperand(0) != CountZeroesArgument)
19114 return SDValue();
19115
19116 unsigned BitWidth = CountZeroes.getValueSizeInBits();
19117 if (!isPowerOf2_32(BitWidth))
19118 return SDValue();
19119
19120 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
19121 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
19122 CountZeroes.getValueType(), CountZeroesArgument);
19123 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
19124 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
19125 CountZeroes.getValueType(), CountZeroesArgument);
19126 }
19127
19128 SDValue BitWidthMinusOne =
19129 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
19130
19131 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
19132 CountZeroes, BitWidthMinusOne);
19133 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
19134}
19135
19137 const RISCVSubtarget &Subtarget) {
19138 SDValue Cond = N->getOperand(0);
19139 SDValue True = N->getOperand(1);
19140 SDValue False = N->getOperand(2);
19141 SDLoc DL(N);
19142 EVT VT = N->getValueType(0);
19143 EVT CondVT = Cond.getValueType();
19144
19145 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
19146 return SDValue();
19147
19148 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
19149 // BEXTI, where C is power of 2.
19150 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
19151 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
19152 SDValue LHS = Cond.getOperand(0);
19153 SDValue RHS = Cond.getOperand(1);
19154 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19155 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
19156 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
19157 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
19158 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
19159 return DAG.getSelect(DL, VT,
19160 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
19161 False, True);
19162 }
19163 }
19164 return SDValue();
19165}
19166
19167static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19168 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19169 return false;
19170
19171 SwapCC = false;
19172 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19173 std::swap(TrueVal, FalseVal);
19174 SwapCC = true;
19175 }
19176
19177 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19178 return false;
19179
19180 SDValue A = FalseVal.getOperand(0);
19181 SDValue B = FalseVal.getOperand(1);
19182 // Add is commutative, so check both orders
19183 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19184 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19185}
19186
19187/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19188/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19189/// register pressure over the add followed by masked vsub sequence.
19191 SDLoc DL(N);
19192 EVT VT = N->getValueType(0);
19193 SDValue CC = N->getOperand(0);
19194 SDValue TrueVal = N->getOperand(1);
19195 SDValue FalseVal = N->getOperand(2);
19196
19197 bool SwapCC;
19198 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19199 return SDValue();
19200
19201 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19202 SDValue A = Sub.getOperand(0);
19203 SDValue B = Sub.getOperand(1);
19204
19205 // Arrange the select such that we can match a masked
19206 // vrsub.vi to perform the conditional negate
19207 SDValue NegB = DAG.getNegative(B, DL, VT);
19208 if (!SwapCC)
19209 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19210 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19211 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19212}
19213
19215 const RISCVSubtarget &Subtarget) {
19216 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19217 return Folded;
19218
19219 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19220 return V;
19221
19222 if (Subtarget.hasConditionalMoveFusion())
19223 return SDValue();
19224
19225 SDValue TrueVal = N->getOperand(1);
19226 SDValue FalseVal = N->getOperand(2);
19227 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19228 return V;
19229 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19230}
19231
19232/// If we have a build_vector where each lane is binop X, C, where C
19233/// is a constant (but not necessarily the same constant on all lanes),
19234/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19235/// We assume that materializing a constant build vector will be no more
19236/// expensive that performing O(n) binops.
19238 const RISCVSubtarget &Subtarget,
19239 const RISCVTargetLowering &TLI) {
19240 SDLoc DL(N);
19241 EVT VT = N->getValueType(0);
19242
19243 assert(!VT.isScalableVector() && "unexpected build vector");
19244
19245 if (VT.getVectorNumElements() == 1)
19246 return SDValue();
19247
19248 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19249 if (!TLI.isBinOp(Opcode))
19250 return SDValue();
19251
19252 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19253 return SDValue();
19254
19255 // This BUILD_VECTOR involves an implicit truncation, and sinking
19256 // truncates through binops is non-trivial.
19257 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19258 return SDValue();
19259
19260 SmallVector<SDValue> LHSOps;
19261 SmallVector<SDValue> RHSOps;
19262 for (SDValue Op : N->ops()) {
19263 if (Op.isUndef()) {
19264 // We can't form a divide or remainder from undef.
19265 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19266 return SDValue();
19267
19268 LHSOps.push_back(Op);
19269 RHSOps.push_back(Op);
19270 continue;
19271 }
19272
19273 // TODO: We can handle operations which have an neutral rhs value
19274 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19275 // of profit in a more explicit manner.
19276 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19277 return SDValue();
19278
19279 LHSOps.push_back(Op.getOperand(0));
19280 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19281 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19282 return SDValue();
19283 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19284 // have different LHS and RHS types.
19285 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19286 return SDValue();
19287
19288 RHSOps.push_back(Op.getOperand(1));
19289 }
19290
19291 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19292 DAG.getBuildVector(VT, DL, RHSOps));
19293}
19294
19296 ElementCount OpEC = OpVT.getVectorElementCount();
19297 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19298 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19299}
19300
19301/// Given fixed length vectors A and B with equal element types, but possibly
19302/// different number of elements, return A + B where either A or B is zero
19303/// padded to the larger number of elements.
19305 SelectionDAG &DAG) {
19306 // NOTE: Manually doing the extract/add/insert scheme produces
19307 // significantly better codegen than the naive pad with zeros
19308 // and add scheme.
19309 EVT AVT = A.getValueType();
19310 EVT BVT = B.getValueType();
19313 std::swap(A, B);
19314 std::swap(AVT, BVT);
19315 }
19316
19317 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19318 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19319 return DAG.getInsertSubvector(DL, B, Res, 0);
19320}
19321
19323 SelectionDAG &DAG,
19324 const RISCVSubtarget &Subtarget,
19325 const RISCVTargetLowering &TLI) {
19326 using namespace SDPatternMatch;
19327 // Note: We intentionally do not check the legality of the reduction type.
19328 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19329 // intermediate types flow through here.
19330 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19332 return SDValue();
19333
19334 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19335 // form).
19336 SDValue A, B;
19337 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19338 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19339 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19340 if (AOpt || BOpt) {
19341 if (AOpt)
19342 A = AOpt;
19343 if (BOpt)
19344 B = BOpt;
19345 // From here, we're doing A + B with mixed types, implicitly zero
19346 // padded to the wider type. Note that we *don't* need the result
19347 // type to be the original VT, and in fact prefer narrower ones
19348 // if possible.
19349 return getZeroPaddedAdd(DL, A, B, DAG);
19350 }
19351 }
19352
19353 // zext a <--> partial_reduce_umla 0, a, 1
19354 // sext a <--> partial_reduce_smla 0, a, 1
19355 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19356 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19357 SDValue A = InVec.getOperand(0);
19358 EVT OpVT = A.getValueType();
19359 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19360 return SDValue();
19361
19362 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19363 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19364 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19365 unsigned Opc =
19366 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19367 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19368 }
19369
19370 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19371 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19372 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19373 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19374 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19375 return SDValue();
19376
19377 if (!ISD::isExtOpcode(A.getOpcode()))
19378 return SDValue();
19379
19380 EVT OpVT = A.getOperand(0).getValueType();
19381 if (OpVT.getVectorElementType() != MVT::i8 ||
19382 OpVT != B.getOperand(0).getValueType() ||
19383 !TLI.isTypeLegal(A.getValueType()))
19384 return SDValue();
19385
19386 unsigned Opc;
19387 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19388 Opc = ISD::PARTIAL_REDUCE_SMLA;
19389 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19390 B.getOpcode() == ISD::ZERO_EXTEND)
19391 Opc = ISD::PARTIAL_REDUCE_UMLA;
19392 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19393 B.getOpcode() == ISD::ZERO_EXTEND)
19394 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19395 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19396 B.getOpcode() == ISD::SIGN_EXTEND) {
19397 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19398 std::swap(A, B);
19399 } else
19400 return SDValue();
19401
19402 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19403 return DAG.getNode(
19404 Opc, DL, ResVT,
19405 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19406}
19407
19409 const RISCVSubtarget &Subtarget,
19410 const RISCVTargetLowering &TLI) {
19411 if (!Subtarget.hasStdExtZvqdotq())
19412 return SDValue();
19413
19414 SDLoc DL(N);
19415 EVT VT = N->getValueType(0);
19416 SDValue InVec = N->getOperand(0);
19417 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19418 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19419 return SDValue();
19420}
19421
19423 const RISCVSubtarget &Subtarget,
19424 const RISCVTargetLowering &TLI) {
19425 SDValue InVec = N->getOperand(0);
19426 SDValue InVal = N->getOperand(1);
19427 SDValue EltNo = N->getOperand(2);
19428 SDLoc DL(N);
19429
19430 EVT VT = InVec.getValueType();
19431 if (VT.isScalableVector())
19432 return SDValue();
19433
19434 if (!InVec.hasOneUse())
19435 return SDValue();
19436
19437 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19438 // move the insert_vector_elts into the arms of the binop. Note that
19439 // the new RHS must be a constant.
19440 const unsigned InVecOpcode = InVec->getOpcode();
19441 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19442 InVal.hasOneUse()) {
19443 SDValue InVecLHS = InVec->getOperand(0);
19444 SDValue InVecRHS = InVec->getOperand(1);
19445 SDValue InValLHS = InVal->getOperand(0);
19446 SDValue InValRHS = InVal->getOperand(1);
19447
19449 return SDValue();
19450 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19451 return SDValue();
19452 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19453 // have different LHS and RHS types.
19454 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19455 return SDValue();
19457 InVecLHS, InValLHS, EltNo);
19459 InVecRHS, InValRHS, EltNo);
19460 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19461 }
19462
19463 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19464 // move the insert_vector_elt to the source operand of the concat_vector.
19465 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19466 return SDValue();
19467
19468 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19469 if (!IndexC)
19470 return SDValue();
19471 unsigned Elt = IndexC->getZExtValue();
19472
19473 EVT ConcatVT = InVec.getOperand(0).getValueType();
19474 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19475 return SDValue();
19476 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19477 unsigned NewIdx = Elt % ConcatNumElts;
19478
19479 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19480 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19481 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19482
19483 SmallVector<SDValue> ConcatOps(InVec->ops());
19484 ConcatOps[ConcatOpIdx] = ConcatOp;
19485 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19486}
19487
19488// If we're concatenating a series of vector loads like
19489// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19490// Then we can turn this into a strided load by widening the vector elements
19491// vlse32 p, stride=n
19493 const RISCVSubtarget &Subtarget,
19494 const RISCVTargetLowering &TLI) {
19495 SDLoc DL(N);
19496 EVT VT = N->getValueType(0);
19497
19498 // Only perform this combine on legal MVTs.
19499 if (!TLI.isTypeLegal(VT))
19500 return SDValue();
19501
19502 // TODO: Potentially extend this to scalable vectors
19503 if (VT.isScalableVector())
19504 return SDValue();
19505
19506 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19507 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19508 !SDValue(BaseLd, 0).hasOneUse())
19509 return SDValue();
19510
19511 EVT BaseLdVT = BaseLd->getValueType(0);
19512
19513 // Go through the loads and check that they're strided
19515 Lds.push_back(BaseLd);
19516 Align Align = BaseLd->getAlign();
19517 for (SDValue Op : N->ops().drop_front()) {
19518 auto *Ld = dyn_cast<LoadSDNode>(Op);
19519 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19520 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19521 Ld->getValueType(0) != BaseLdVT)
19522 return SDValue();
19523
19524 Lds.push_back(Ld);
19525
19526 // The common alignment is the most restrictive (smallest) of all the loads
19527 Align = std::min(Align, Ld->getAlign());
19528 }
19529
19530 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19531 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19532 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19533 // If the load ptrs can be decomposed into a common (Base + Index) with a
19534 // common constant stride, then return the constant stride.
19535 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19536 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19537 if (BIO1.equalBaseIndex(BIO2, DAG))
19538 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19539
19540 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19541 SDValue P1 = Ld1->getBasePtr();
19542 SDValue P2 = Ld2->getBasePtr();
19543 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19544 return {{P2.getOperand(1), false}};
19545 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19546 return {{P1.getOperand(1), true}};
19547
19548 return std::nullopt;
19549 };
19550
19551 // Get the distance between the first and second loads
19552 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19553 if (!BaseDiff)
19554 return SDValue();
19555
19556 // Check all the loads are the same distance apart
19557 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19558 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19559 return SDValue();
19560
19561 // TODO: At this point, we've successfully matched a generalized gather
19562 // load. Maybe we should emit that, and then move the specialized
19563 // matchers above and below into a DAG combine?
19564
19565 // Get the widened scalar type, e.g. v4i8 -> i64
19566 unsigned WideScalarBitWidth =
19567 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19568 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19569
19570 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19571 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19572 if (!TLI.isTypeLegal(WideVecVT))
19573 return SDValue();
19574
19575 // Check that the operation is legal
19576 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19577 return SDValue();
19578
19579 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19580 SDValue Stride =
19581 std::holds_alternative<SDValue>(StrideVariant)
19582 ? std::get<SDValue>(StrideVariant)
19583 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19584 Lds[0]->getOffset().getValueType());
19585 if (MustNegateStride)
19586 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19587
19588 SDValue AllOneMask =
19589 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19590 DAG.getConstant(1, DL, MVT::i1));
19591
19592 uint64_t MemSize;
19593 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19594 ConstStride && ConstStride->getSExtValue() >= 0)
19595 // total size = (elsize * n) + (stride - elsize) * (n-1)
19596 // = elsize + stride * (n-1)
19597 MemSize = WideScalarVT.getSizeInBits() +
19598 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19599 else
19600 // If Stride isn't constant, then we can't know how much it will load
19602
19604 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19605 Align);
19606
19607 SDValue StridedLoad = DAG.getStridedLoadVP(
19608 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19609 AllOneMask,
19610 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19611
19612 for (SDValue Ld : N->ops())
19613 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19614
19615 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19616}
19617
19619 const RISCVSubtarget &Subtarget,
19620 const RISCVTargetLowering &TLI) {
19621 SDLoc DL(N);
19622 EVT VT = N->getValueType(0);
19623 const unsigned ElementSize = VT.getScalarSizeInBits();
19624 const unsigned NumElts = VT.getVectorNumElements();
19625 SDValue V1 = N->getOperand(0);
19626 SDValue V2 = N->getOperand(1);
19627 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19628 MVT XLenVT = Subtarget.getXLenVT();
19629
19630 // Recognized a disguised select of add/sub.
19631 bool SwapCC;
19632 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19633 matchSelectAddSub(V1, V2, SwapCC)) {
19634 SDValue Sub = SwapCC ? V1 : V2;
19635 SDValue A = Sub.getOperand(0);
19636 SDValue B = Sub.getOperand(1);
19637
19638 SmallVector<SDValue> MaskVals;
19639 for (int MaskIndex : Mask) {
19640 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19641 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19642 }
19643 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19644 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19645 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19646
19647 // Arrange the select such that we can match a masked
19648 // vrsub.vi to perform the conditional negate
19649 SDValue NegB = DAG.getNegative(B, DL, VT);
19650 if (!SwapCC)
19651 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19652 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19653 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19654 }
19655
19656 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19657 // during the combine phase before type legalization, and relies on
19658 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19659 // for the source mask.
19660 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19661 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19662 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19663 return SDValue();
19664
19665 SmallVector<int, 8> NewMask;
19666 narrowShuffleMaskElts(2, Mask, NewMask);
19667
19668 LLVMContext &C = *DAG.getContext();
19669 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19670 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19671 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19672 DAG.getBitcast(NewVT, V2), NewMask);
19673 return DAG.getBitcast(VT, Res);
19674}
19675
19677 const RISCVSubtarget &Subtarget) {
19678 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19679
19680 if (N->getValueType(0).isFixedLengthVector())
19681 return SDValue();
19682
19683 SDValue Addend = N->getOperand(0);
19684 SDValue MulOp = N->getOperand(1);
19685
19686 if (N->getOpcode() == RISCVISD::ADD_VL) {
19687 SDValue AddPassthruOp = N->getOperand(2);
19688 if (!AddPassthruOp.isUndef())
19689 return SDValue();
19690 }
19691
19692 auto IsVWMulOpc = [](unsigned Opc) {
19693 switch (Opc) {
19694 case RISCVISD::VWMUL_VL:
19695 case RISCVISD::VWMULU_VL:
19696 case RISCVISD::VWMULSU_VL:
19697 return true;
19698 default:
19699 return false;
19700 }
19701 };
19702
19703 if (!IsVWMulOpc(MulOp.getOpcode()))
19704 std::swap(Addend, MulOp);
19705
19706 if (!IsVWMulOpc(MulOp.getOpcode()))
19707 return SDValue();
19708
19709 SDValue MulPassthruOp = MulOp.getOperand(2);
19710
19711 if (!MulPassthruOp.isUndef())
19712 return SDValue();
19713
19714 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19715 const RISCVSubtarget &Subtarget) {
19716 if (N->getOpcode() == ISD::ADD) {
19717 SDLoc DL(N);
19718 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19719 Subtarget);
19720 }
19721 return std::make_pair(N->getOperand(3), N->getOperand(4));
19722 }(N, DAG, Subtarget);
19723
19724 SDValue MulMask = MulOp.getOperand(3);
19725 SDValue MulVL = MulOp.getOperand(4);
19726
19727 if (AddMask != MulMask || AddVL != MulVL)
19728 return SDValue();
19729
19730 const auto &TSInfo =
19731 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19732 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19733
19734 SDLoc DL(N);
19735 EVT VT = N->getValueType(0);
19736 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19737 AddVL};
19738 return DAG.getNode(Opc, DL, VT, Ops);
19739}
19740
19742 const RISCVSubtarget &Subtarget) {
19743
19744 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19745
19746 if (!N->getValueType(0).isVector())
19747 return SDValue();
19748
19749 SDValue Addend = N->getOperand(0);
19750 SDValue DotOp = N->getOperand(1);
19751
19752 if (N->getOpcode() == RISCVISD::ADD_VL) {
19753 SDValue AddPassthruOp = N->getOperand(2);
19754 if (!AddPassthruOp.isUndef())
19755 return SDValue();
19756 }
19757
19758 auto IsVqdotqOpc = [](unsigned Opc) {
19759 switch (Opc) {
19760 case RISCVISD::VQDOT_VL:
19761 case RISCVISD::VQDOTU_VL:
19762 case RISCVISD::VQDOTSU_VL:
19763 return true;
19764 default:
19765 return false;
19766 }
19767 };
19768
19769 if (!IsVqdotqOpc(DotOp.getOpcode()))
19770 std::swap(Addend, DotOp);
19771
19772 if (!IsVqdotqOpc(DotOp.getOpcode()))
19773 return SDValue();
19774
19775 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19776 const RISCVSubtarget &Subtarget) {
19777 if (N->getOpcode() == ISD::ADD) {
19778 SDLoc DL(N);
19779 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19780 Subtarget);
19781 }
19782 return std::make_pair(N->getOperand(3), N->getOperand(4));
19783 }(N, DAG, Subtarget);
19784
19785 SDValue MulVL = DotOp.getOperand(4);
19786 if (AddVL != MulVL)
19787 return SDValue();
19788
19789 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19790 AddMask.getOperand(0) != MulVL)
19791 return SDValue();
19792
19793 SDValue AccumOp = DotOp.getOperand(2);
19794 SDLoc DL(N);
19795 EVT VT = N->getValueType(0);
19796 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19797 DAG.getUNDEF(VT), AddMask, AddVL);
19798
19799 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19800 DotOp.getOperand(3), DotOp->getOperand(4)};
19801 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19802}
19803
19804static bool
19806 ISD::MemIndexType &IndexType,
19808 if (!DCI.isBeforeLegalize())
19809 return false;
19810
19811 SelectionDAG &DAG = DCI.DAG;
19812 const MVT XLenVT =
19813 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19814
19815 const EVT IndexVT = Index.getValueType();
19816
19817 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19818 // mode, so anything else must be manually legalized.
19819 if (!isIndexTypeSigned(IndexType))
19820 return false;
19821
19822 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19823 // Any index legalization should first promote to XLenVT, so we don't lose
19824 // bits when scaling. This may create an illegal index type so we let
19825 // LLVM's legalization take care of the splitting.
19826 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19827 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19828 EVT::getVectorVT(*DAG.getContext(), XLenVT,
19829 IndexVT.getVectorElementCount()),
19830 Index);
19831 }
19832 IndexType = ISD::UNSIGNED_SCALED;
19833 return true;
19834}
19835
19836/// Match the index vector of a scatter or gather node as the shuffle mask
19837/// which performs the rearrangement if possible. Will only match if
19838/// all lanes are touched, and thus replacing the scatter or gather with
19839/// a unit strided access and shuffle is legal.
19840static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19841 SmallVector<int> &ShuffleMask) {
19842 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19843 return false;
19844 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19845 return false;
19846
19847 const unsigned ElementSize = VT.getScalarStoreSize();
19848 const unsigned NumElems = VT.getVectorNumElements();
19849
19850 // Create the shuffle mask and check all bits active
19851 assert(ShuffleMask.empty());
19852 BitVector ActiveLanes(NumElems);
19853 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19854 // TODO: We've found an active bit of UB, and could be
19855 // more aggressive here if desired.
19856 if (Index->getOperand(i)->isUndef())
19857 return false;
19858 uint64_t C = Index->getConstantOperandVal(i);
19859 if (C % ElementSize != 0)
19860 return false;
19861 C = C / ElementSize;
19862 if (C >= NumElems)
19863 return false;
19864 ShuffleMask.push_back(C);
19865 ActiveLanes.set(C);
19866 }
19867 return ActiveLanes.all();
19868}
19869
19870/// Match the index of a gather or scatter operation as an operation
19871/// with twice the element width and half the number of elements. This is
19872/// generally profitable (if legal) because these operations are linear
19873/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19874/// come out ahead.
19875static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19876 Align BaseAlign, const RISCVSubtarget &ST) {
19877 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19878 return false;
19879 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19880 return false;
19881
19882 // Attempt a doubling. If we can use a element type 4x or 8x in
19883 // size, this will happen via multiply iterations of the transform.
19884 const unsigned NumElems = VT.getVectorNumElements();
19885 if (NumElems % 2 != 0)
19886 return false;
19887
19888 const unsigned ElementSize = VT.getScalarStoreSize();
19889 const unsigned WiderElementSize = ElementSize * 2;
19890 if (WiderElementSize > ST.getELen()/8)
19891 return false;
19892
19893 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19894 return false;
19895
19896 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19897 // TODO: We've found an active bit of UB, and could be
19898 // more aggressive here if desired.
19899 if (Index->getOperand(i)->isUndef())
19900 return false;
19901 // TODO: This offset check is too strict if we support fully
19902 // misaligned memory operations.
19903 uint64_t C = Index->getConstantOperandVal(i);
19904 if (i % 2 == 0) {
19905 if (C % WiderElementSize != 0)
19906 return false;
19907 continue;
19908 }
19909 uint64_t Last = Index->getConstantOperandVal(i-1);
19910 if (C != Last + ElementSize)
19911 return false;
19912 }
19913 return true;
19914}
19915
19916// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19917// This would be benefit for the cases where X and Y are both the same value
19918// type of low precision vectors. Since the truncate would be lowered into
19919// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19920// restriction, such pattern would be expanded into a series of "vsetvli"
19921// and "vnsrl" instructions later to reach this point.
19923 SDValue Mask = N->getOperand(1);
19924 SDValue VL = N->getOperand(2);
19925
19926 bool IsVLMAX = isAllOnesConstant(VL) ||
19927 (isa<RegisterSDNode>(VL) &&
19928 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
19929 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
19930 Mask.getOperand(0) != VL)
19931 return SDValue();
19932
19933 auto IsTruncNode = [&](SDValue V) {
19934 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
19935 V.getOperand(1) == Mask && V.getOperand(2) == VL;
19936 };
19937
19938 SDValue Op = N->getOperand(0);
19939
19940 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
19941 // to distinguish such pattern.
19942 while (IsTruncNode(Op)) {
19943 if (!Op.hasOneUse())
19944 return SDValue();
19945 Op = Op.getOperand(0);
19946 }
19947
19948 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
19949 return SDValue();
19950
19951 SDValue N0 = Op.getOperand(0);
19952 SDValue N1 = Op.getOperand(1);
19953 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
19954 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
19955 return SDValue();
19956
19957 SDValue N00 = N0.getOperand(0);
19958 SDValue N10 = N1.getOperand(0);
19959 if (!N00.getValueType().isVector() ||
19960 N00.getValueType() != N10.getValueType() ||
19961 N->getValueType(0) != N10.getValueType())
19962 return SDValue();
19963
19964 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
19965 SDValue SMin =
19966 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
19967 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
19968 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
19969}
19970
19971// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
19972// maximum value for the truncated type.
19973// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
19974// is the signed maximum value for the truncated type and C2 is the signed
19975// minimum value.
19977 const RISCVSubtarget &Subtarget) {
19978 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
19979
19980 MVT VT = N->getSimpleValueType(0);
19981
19982 SDValue Mask = N->getOperand(1);
19983 SDValue VL = N->getOperand(2);
19984
19985 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
19986 APInt &SplatVal) {
19987 if (V.getOpcode() != Opc &&
19988 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
19989 V.getOperand(3) == Mask && V.getOperand(4) == VL))
19990 return SDValue();
19991
19992 SDValue Op = V.getOperand(1);
19993
19994 // Peek through conversion between fixed and scalable vectors.
19995 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
19996 isNullConstant(Op.getOperand(2)) &&
19997 Op.getOperand(1).getValueType().isFixedLengthVector() &&
19998 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19999 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
20000 isNullConstant(Op.getOperand(1).getOperand(1)))
20001 Op = Op.getOperand(1).getOperand(0);
20002
20003 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
20004 return V.getOperand(0);
20005
20006 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
20007 Op.getOperand(2) == VL) {
20008 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
20009 SplatVal =
20010 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
20011 return V.getOperand(0);
20012 }
20013 }
20014
20015 return SDValue();
20016 };
20017
20018 SDLoc DL(N);
20019
20020 auto DetectUSatPattern = [&](SDValue V) {
20021 APInt LoC, HiC;
20022
20023 // Simple case, V is a UMIN.
20024 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
20025 if (HiC.isMask(VT.getScalarSizeInBits()))
20026 return UMinOp;
20027
20028 // If we have an SMAX that removes negative numbers first, then we can match
20029 // SMIN instead of UMIN.
20030 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20031 if (SDValue SMaxOp =
20032 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20033 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
20034 return SMinOp;
20035
20036 // If we have an SMIN before an SMAX and the SMAX constant is less than or
20037 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
20038 // first.
20039 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20040 if (SDValue SMinOp =
20041 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20042 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
20043 HiC.uge(LoC))
20044 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
20045 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
20046 Mask, VL);
20047
20048 return SDValue();
20049 };
20050
20051 auto DetectSSatPattern = [&](SDValue V) {
20052 unsigned NumDstBits = VT.getScalarSizeInBits();
20053 unsigned NumSrcBits = V.getScalarValueSizeInBits();
20054 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
20055 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
20056
20057 APInt HiC, LoC;
20058 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20059 if (SDValue SMaxOp =
20060 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20061 if (HiC == SignedMax && LoC == SignedMin)
20062 return SMaxOp;
20063
20064 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20065 if (SDValue SMinOp =
20066 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20067 if (HiC == SignedMax && LoC == SignedMin)
20068 return SMinOp;
20069
20070 return SDValue();
20071 };
20072
20073 SDValue Src = N->getOperand(0);
20074
20075 // Look through multiple layers of truncates.
20076 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20077 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
20078 Src.hasOneUse())
20079 Src = Src.getOperand(0);
20080
20081 SDValue Val;
20082 unsigned ClipOpc;
20083 if ((Val = DetectUSatPattern(Src)))
20084 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
20085 else if ((Val = DetectSSatPattern(Src)))
20086 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
20087 else
20088 return SDValue();
20089
20090 MVT ValVT = Val.getSimpleValueType();
20091
20092 do {
20093 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
20094 ValVT = ValVT.changeVectorElementType(ValEltVT);
20095 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
20096 } while (ValVT != VT);
20097
20098 return Val;
20099}
20100
20101// Convert
20102// (iX ctpop (bitcast (vXi1 A)))
20103// ->
20104// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20105// and
20106// (iN reduce.add (zext (vXi1 A to vXiN))
20107// ->
20108// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20109// FIXME: It's complicated to match all the variations of this after type
20110// legalization so we only handle the pre-type legalization pattern, but that
20111// requires the fixed vector type to be legal.
20113 const RISCVSubtarget &Subtarget) {
20114 unsigned Opc = N->getOpcode();
20115 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
20116 "Unexpected opcode");
20117 EVT VT = N->getValueType(0);
20118 if (!VT.isScalarInteger())
20119 return SDValue();
20120
20121 SDValue Src = N->getOperand(0);
20122
20123 if (Opc == ISD::CTPOP) {
20124 // Peek through zero_extend. It doesn't change the count.
20125 if (Src.getOpcode() == ISD::ZERO_EXTEND)
20126 Src = Src.getOperand(0);
20127
20128 if (Src.getOpcode() != ISD::BITCAST)
20129 return SDValue();
20130 Src = Src.getOperand(0);
20131 } else if (Opc == ISD::VECREDUCE_ADD) {
20132 if (Src.getOpcode() != ISD::ZERO_EXTEND)
20133 return SDValue();
20134 Src = Src.getOperand(0);
20135 }
20136
20137 EVT SrcEVT = Src.getValueType();
20138 if (!SrcEVT.isSimple())
20139 return SDValue();
20140
20141 MVT SrcMVT = SrcEVT.getSimpleVT();
20142 // Make sure the input is an i1 vector.
20143 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
20144 return SDValue();
20145
20146 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20147 if (!TLI.isTypeLegal(SrcMVT))
20148 return SDValue();
20149
20150 // Check that destination type is large enough to hold result without
20151 // overflow.
20152 if (Opc == ISD::VECREDUCE_ADD) {
20153 unsigned EltSize = SrcMVT.getScalarSizeInBits();
20154 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
20155 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
20156 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
20157 ? SrcMVT.getVectorNumElements()
20159 VectorBitsMax, EltSize, MinSize);
20160 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
20161 return SDValue();
20162 }
20163
20164 MVT ContainerVT = SrcMVT;
20165 if (SrcMVT.isFixedLengthVector()) {
20166 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20167 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20168 }
20169
20170 SDLoc DL(N);
20171 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20172
20173 MVT XLenVT = Subtarget.getXLenVT();
20174 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20175 return DAG.getZExtOrTrunc(Pop, DL, VT);
20176}
20177
20180 const RISCVSubtarget &Subtarget) {
20181 // (shl (zext x), y) -> (vwsll x, y)
20182 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20183 return V;
20184
20185 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20186 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20187
20188 if (!DCI.isAfterLegalizeDAG())
20189 return SDValue();
20190
20191 SDValue LHS = N->getOperand(0);
20192 if (!LHS.hasOneUse())
20193 return SDValue();
20194 unsigned Opcode;
20195 switch (LHS.getOpcode()) {
20196 case ISD::SIGN_EXTEND:
20197 case RISCVISD::VSEXT_VL:
20198 Opcode = RISCVISD::VWMULSU_VL;
20199 break;
20200 case ISD::ZERO_EXTEND:
20201 case RISCVISD::VZEXT_VL:
20202 Opcode = RISCVISD::VWMULU_VL;
20203 break;
20204 default:
20205 return SDValue();
20206 }
20207
20208 SDValue RHS = N->getOperand(1);
20209 APInt ShAmt;
20210 uint64_t ShAmtInt;
20211 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20212 ShAmtInt = ShAmt.getZExtValue();
20213 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20214 RHS.getOperand(1).getOpcode() == ISD::Constant)
20215 ShAmtInt = RHS.getConstantOperandVal(1);
20216 else
20217 return SDValue();
20218
20219 // Better foldings:
20220 // (shl (sext x), 1) -> (vwadd x, x)
20221 // (shl (zext x), 1) -> (vwaddu x, x)
20222 if (ShAmtInt <= 1)
20223 return SDValue();
20224
20225 SDValue NarrowOp = LHS.getOperand(0);
20226 MVT NarrowVT = NarrowOp.getSimpleValueType();
20227 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20228 if (ShAmtInt >= NarrowBits)
20229 return SDValue();
20230 MVT VT = N->getSimpleValueType(0);
20231 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20232 return SDValue();
20233
20234 SelectionDAG &DAG = DCI.DAG;
20235 SDLoc DL(N);
20236 SDValue Passthru, Mask, VL;
20237 switch (N->getOpcode()) {
20238 case ISD::SHL:
20239 Passthru = DAG.getUNDEF(VT);
20240 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20241 break;
20242 case RISCVISD::SHL_VL:
20243 Passthru = N->getOperand(2);
20244 Mask = N->getOperand(3);
20245 VL = N->getOperand(4);
20246 break;
20247 default:
20248 llvm_unreachable("Expected SHL");
20249 }
20250 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20251 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20252 Passthru, Mask, VL);
20253}
20254
20256 DAGCombinerInfo &DCI) const {
20257 SelectionDAG &DAG = DCI.DAG;
20258 const MVT XLenVT = Subtarget.getXLenVT();
20259 SDLoc DL(N);
20260
20261 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20262 // bits are demanded. N will be added to the Worklist if it was not deleted.
20263 // Caller should return SDValue(N, 0) if this returns true.
20264 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20265 SDValue Op = N->getOperand(OpNo);
20266 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20267 if (!SimplifyDemandedBits(Op, Mask, DCI))
20268 return false;
20269
20270 if (N->getOpcode() != ISD::DELETED_NODE)
20271 DCI.AddToWorklist(N);
20272 return true;
20273 };
20274
20275 switch (N->getOpcode()) {
20276 default:
20277 break;
20278 case RISCVISD::SplitF64: {
20279 SDValue Op0 = N->getOperand(0);
20280 // If the input to SplitF64 is just BuildPairF64 then the operation is
20281 // redundant. Instead, use BuildPairF64's operands directly.
20282 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20283 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20284
20285 if (Op0->isUndef()) {
20286 SDValue Lo = DAG.getUNDEF(MVT::i32);
20287 SDValue Hi = DAG.getUNDEF(MVT::i32);
20288 return DCI.CombineTo(N, Lo, Hi);
20289 }
20290
20291 // It's cheaper to materialise two 32-bit integers than to load a double
20292 // from the constant pool and transfer it to integer registers through the
20293 // stack.
20295 APInt V = C->getValueAPF().bitcastToAPInt();
20296 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20297 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20298 return DCI.CombineTo(N, Lo, Hi);
20299 }
20300
20301 // This is a target-specific version of a DAGCombine performed in
20302 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20303 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20304 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20305 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20306 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20307 break;
20308 SDValue NewSplitF64 =
20309 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20310 Op0.getOperand(0));
20311 SDValue Lo = NewSplitF64.getValue(0);
20312 SDValue Hi = NewSplitF64.getValue(1);
20313 APInt SignBit = APInt::getSignMask(32);
20314 if (Op0.getOpcode() == ISD::FNEG) {
20315 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20316 DAG.getConstant(SignBit, DL, MVT::i32));
20317 return DCI.CombineTo(N, Lo, NewHi);
20318 }
20319 assert(Op0.getOpcode() == ISD::FABS);
20320 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20321 DAG.getConstant(~SignBit, DL, MVT::i32));
20322 return DCI.CombineTo(N, Lo, NewHi);
20323 }
20324 case RISCVISD::SLLW:
20325 case RISCVISD::SRAW:
20326 case RISCVISD::SRLW:
20327 case RISCVISD::RORW:
20328 case RISCVISD::ROLW: {
20329 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20330 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20331 SimplifyDemandedLowBitsHelper(1, 5))
20332 return SDValue(N, 0);
20333
20334 break;
20335 }
20336 case RISCVISD::ABSW:
20337 case RISCVISD::CLZW:
20338 case RISCVISD::CTZW: {
20339 // Only the lower 32 bits of the first operand are read
20340 if (SimplifyDemandedLowBitsHelper(0, 32))
20341 return SDValue(N, 0);
20342 break;
20343 }
20344 case RISCVISD::FMV_W_X_RV64: {
20345 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20346 // conversion is unnecessary and can be replaced with the
20347 // FMV_X_ANYEXTW_RV64 operand.
20348 SDValue Op0 = N->getOperand(0);
20349 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20350 return Op0.getOperand(0);
20351 break;
20352 }
20353 case RISCVISD::FMV_X_ANYEXTH:
20354 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20355 SDLoc DL(N);
20356 SDValue Op0 = N->getOperand(0);
20357 MVT VT = N->getSimpleValueType(0);
20358
20359 // Constant fold.
20360 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20361 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20362 return DAG.getConstant(Val, DL, VT);
20363 }
20364
20365 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20366 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20367 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20368 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20369 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20370 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20371 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20372 assert(Op0.getOperand(0).getValueType() == VT &&
20373 "Unexpected value type!");
20374 return Op0.getOperand(0);
20375 }
20376
20377 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20378 cast<LoadSDNode>(Op0)->isSimple()) {
20380 auto *LN0 = cast<LoadSDNode>(Op0);
20381 SDValue Load =
20382 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20383 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20384 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20385 return Load;
20386 }
20387
20388 // This is a target-specific version of a DAGCombine performed in
20389 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20390 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20391 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20392 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20393 !Op0.getNode()->hasOneUse())
20394 break;
20395 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20396 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20397 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20398 if (Op0.getOpcode() == ISD::FNEG)
20399 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20400 DAG.getConstant(SignBit, DL, VT));
20401
20402 assert(Op0.getOpcode() == ISD::FABS);
20403 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20404 DAG.getConstant(~SignBit, DL, VT));
20405 }
20406 case ISD::ABS: {
20407 EVT VT = N->getValueType(0);
20408 SDValue N0 = N->getOperand(0);
20409 // abs (sext) -> zext (abs)
20410 // abs (zext) -> zext (handled elsewhere)
20411 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20412 SDValue Src = N0.getOperand(0);
20413 SDLoc DL(N);
20414 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20415 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20416 }
20417 break;
20418 }
20419 case ISD::ADD: {
20420 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20421 return V;
20422 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20423 return V;
20424 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20425 return V;
20426 return performADDCombine(N, DCI, Subtarget);
20427 }
20428 case ISD::SUB: {
20429 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20430 return V;
20431 return performSUBCombine(N, DAG, Subtarget);
20432 }
20433 case ISD::AND:
20434 return performANDCombine(N, DCI, Subtarget);
20435 case ISD::OR: {
20436 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20437 return V;
20438 return performORCombine(N, DCI, Subtarget);
20439 }
20440 case ISD::XOR:
20441 return performXORCombine(N, DAG, Subtarget);
20442 case ISD::MUL:
20443 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20444 return V;
20445 return performMULCombine(N, DAG, DCI, Subtarget);
20446 case ISD::SDIV:
20447 case ISD::UDIV:
20448 case ISD::SREM:
20449 case ISD::UREM:
20450 if (SDValue V = combineBinOpOfZExt(N, DAG))
20451 return V;
20452 break;
20453 case ISD::FMUL: {
20454 using namespace SDPatternMatch;
20455 SDLoc DL(N);
20456 EVT VT = N->getValueType(0);
20457 SDValue X, Y;
20458 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20459 // hoistFNegAboveFMulFDiv.
20460 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20462 return DAG.getNode(ISD::FNEG, DL, VT,
20463 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20464
20465 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20466 SDValue N0 = N->getOperand(0);
20467 SDValue N1 = N->getOperand(1);
20468 if (N0->getOpcode() != ISD::FCOPYSIGN)
20469 std::swap(N0, N1);
20470 if (N0->getOpcode() != ISD::FCOPYSIGN)
20471 return SDValue();
20473 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20474 return SDValue();
20475 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20476 return SDValue();
20477 SDValue Sign = N0->getOperand(1);
20478 if (Sign.getValueType() != VT)
20479 return SDValue();
20480 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20481 }
20482 case ISD::FADD:
20483 case ISD::UMAX:
20484 case ISD::UMIN:
20485 case ISD::SMAX:
20486 case ISD::SMIN:
20487 case ISD::FMAXNUM:
20488 case ISD::FMINNUM: {
20489 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20490 return V;
20491 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20492 return V;
20493 return SDValue();
20494 }
20495 case ISD::SETCC:
20496 return performSETCCCombine(N, DCI, Subtarget);
20498 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20499 case ISD::ZERO_EXTEND:
20500 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20501 // type legalization. This is safe because fp_to_uint produces poison if
20502 // it overflows.
20503 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20504 SDValue Src = N->getOperand(0);
20505 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20506 isTypeLegal(Src.getOperand(0).getValueType()))
20507 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20508 Src.getOperand(0));
20509 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20510 isTypeLegal(Src.getOperand(1).getValueType())) {
20511 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20512 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20513 Src.getOperand(0), Src.getOperand(1));
20514 DCI.CombineTo(N, Res);
20515 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20516 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20517 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20518 }
20519 }
20520 return SDValue();
20521 case RISCVISD::TRUNCATE_VECTOR_VL:
20522 if (SDValue V = combineTruncOfSraSext(N, DAG))
20523 return V;
20524 return combineTruncToVnclip(N, DAG, Subtarget);
20525 case ISD::VP_TRUNCATE:
20526 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20527 case ISD::TRUNCATE:
20528 return performTRUNCATECombine(N, DAG, Subtarget);
20529 case ISD::SELECT:
20530 return performSELECTCombine(N, DAG, Subtarget);
20531 case ISD::VSELECT:
20532 return performVSELECTCombine(N, DAG);
20533 case RISCVISD::CZERO_EQZ:
20534 case RISCVISD::CZERO_NEZ: {
20535 SDValue Val = N->getOperand(0);
20536 SDValue Cond = N->getOperand(1);
20537
20538 unsigned Opc = N->getOpcode();
20539
20540 // czero_eqz x, x -> x
20541 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20542 return Val;
20543
20544 unsigned InvOpc =
20545 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20546
20547 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20548 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20549 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20550 SDValue NewCond = Cond.getOperand(0);
20551 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20552 if (DAG.MaskedValueIsZero(NewCond, Mask))
20553 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20554 }
20555 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20556 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20557 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20558 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20559 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20560 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20561 if (ISD::isIntEqualitySetCC(CCVal))
20562 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20563 N->getValueType(0), Val, Cond.getOperand(0));
20564 }
20565 return SDValue();
20566 }
20567 case RISCVISD::SELECT_CC: {
20568 // Transform
20569 SDValue LHS = N->getOperand(0);
20570 SDValue RHS = N->getOperand(1);
20571 SDValue CC = N->getOperand(2);
20572 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20573 SDValue TrueV = N->getOperand(3);
20574 SDValue FalseV = N->getOperand(4);
20575 SDLoc DL(N);
20576 EVT VT = N->getValueType(0);
20577
20578 // If the True and False values are the same, we don't need a select_cc.
20579 if (TrueV == FalseV)
20580 return TrueV;
20581
20582 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20583 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20584 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20585 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20586 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20587 if (CCVal == ISD::CondCode::SETGE)
20588 std::swap(TrueV, FalseV);
20589
20590 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20591 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20592 // Only handle simm12, if it is not in this range, it can be considered as
20593 // register.
20594 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20595 isInt<12>(TrueSImm - FalseSImm)) {
20596 SDValue SRA =
20597 DAG.getNode(ISD::SRA, DL, VT, LHS,
20598 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20599 SDValue AND =
20600 DAG.getNode(ISD::AND, DL, VT, SRA,
20601 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20602 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20603 }
20604
20605 if (CCVal == ISD::CondCode::SETGE)
20606 std::swap(TrueV, FalseV);
20607 }
20608
20609 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20610 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20611 {LHS, RHS, CC, TrueV, FalseV});
20612
20613 if (!Subtarget.hasConditionalMoveFusion()) {
20614 // (select c, -1, y) -> -c | y
20615 if (isAllOnesConstant(TrueV)) {
20616 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20617 SDValue Neg = DAG.getNegative(C, DL, VT);
20618 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20619 }
20620 // (select c, y, -1) -> -!c | y
20621 if (isAllOnesConstant(FalseV)) {
20622 SDValue C =
20623 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20624 SDValue Neg = DAG.getNegative(C, DL, VT);
20625 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20626 }
20627
20628 // (select c, 0, y) -> -!c & y
20629 if (isNullConstant(TrueV)) {
20630 SDValue C =
20631 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20632 SDValue Neg = DAG.getNegative(C, DL, VT);
20633 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20634 }
20635 // (select c, y, 0) -> -c & y
20636 if (isNullConstant(FalseV)) {
20637 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20638 SDValue Neg = DAG.getNegative(C, DL, VT);
20639 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20640 }
20641 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20642 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20643 if (((isOneConstant(FalseV) && LHS == TrueV &&
20644 CCVal == ISD::CondCode::SETNE) ||
20645 (isOneConstant(TrueV) && LHS == FalseV &&
20646 CCVal == ISD::CondCode::SETEQ)) &&
20647 isNullConstant(RHS)) {
20648 // freeze it to be safe.
20649 LHS = DAG.getFreeze(LHS);
20650 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20651 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20652 }
20653 }
20654
20655 // If both true/false are an xor with 1, pull through the select.
20656 // This can occur after op legalization if both operands are setccs that
20657 // require an xor to invert.
20658 // FIXME: Generalize to other binary ops with identical operand?
20659 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20660 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20661 isOneConstant(TrueV.getOperand(1)) &&
20662 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20663 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20664 TrueV.getOperand(0), FalseV.getOperand(0));
20665 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20666 }
20667
20668 return SDValue();
20669 }
20670 case RISCVISD::BR_CC: {
20671 SDValue LHS = N->getOperand(1);
20672 SDValue RHS = N->getOperand(2);
20673 SDValue CC = N->getOperand(3);
20674 SDLoc DL(N);
20675
20676 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20677 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20678 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20679
20680 return SDValue();
20681 }
20682 case ISD::BITREVERSE:
20683 return performBITREVERSECombine(N, DAG, Subtarget);
20684 case ISD::FP_TO_SINT:
20685 case ISD::FP_TO_UINT:
20686 return performFP_TO_INTCombine(N, DCI, Subtarget);
20689 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20690 case ISD::FCOPYSIGN: {
20691 EVT VT = N->getValueType(0);
20692 if (!VT.isVector())
20693 break;
20694 // There is a form of VFSGNJ which injects the negated sign of its second
20695 // operand. Try and bubble any FNEG up after the extend/round to produce
20696 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20697 // TRUNC=1.
20698 SDValue In2 = N->getOperand(1);
20699 // Avoid cases where the extend/round has multiple uses, as duplicating
20700 // those is typically more expensive than removing a fneg.
20701 if (!In2.hasOneUse())
20702 break;
20703 if (In2.getOpcode() != ISD::FP_EXTEND &&
20704 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20705 break;
20706 In2 = In2.getOperand(0);
20707 if (In2.getOpcode() != ISD::FNEG)
20708 break;
20709 SDLoc DL(N);
20710 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20711 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20712 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20713 }
20714 case ISD::MGATHER: {
20715 const auto *MGN = cast<MaskedGatherSDNode>(N);
20716 const EVT VT = N->getValueType(0);
20717 SDValue Index = MGN->getIndex();
20718 SDValue ScaleOp = MGN->getScale();
20719 ISD::MemIndexType IndexType = MGN->getIndexType();
20720 assert(!MGN->isIndexScaled() &&
20721 "Scaled gather/scatter should not be formed");
20722
20723 SDLoc DL(N);
20724 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20725 return DAG.getMaskedGather(
20726 N->getVTList(), MGN->getMemoryVT(), DL,
20727 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20728 MGN->getBasePtr(), Index, ScaleOp},
20729 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20730
20731 if (narrowIndex(Index, IndexType, DAG))
20732 return DAG.getMaskedGather(
20733 N->getVTList(), MGN->getMemoryVT(), DL,
20734 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20735 MGN->getBasePtr(), Index, ScaleOp},
20736 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20737
20738 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20739 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20740 // The sequence will be XLenVT, not the type of Index. Tell
20741 // isSimpleVIDSequence this so we avoid overflow.
20742 if (std::optional<VIDSequence> SimpleVID =
20743 isSimpleVIDSequence(Index, Subtarget.getXLen());
20744 SimpleVID && SimpleVID->StepDenominator == 1) {
20745 const int64_t StepNumerator = SimpleVID->StepNumerator;
20746 const int64_t Addend = SimpleVID->Addend;
20747
20748 // Note: We don't need to check alignment here since (by assumption
20749 // from the existence of the gather), our offsets must be sufficiently
20750 // aligned.
20751
20752 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20753 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20754 assert(IndexType == ISD::UNSIGNED_SCALED);
20755 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20756 DAG.getSignedConstant(Addend, DL, PtrVT));
20757
20758 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20760 SDValue StridedLoad = DAG.getStridedLoadVP(
20761 VT, DL, MGN->getChain(), BasePtr,
20762 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20763 EVL, MGN->getMemOperand());
20764 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20765 MGN->getPassThru());
20766 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20767 DL);
20768 }
20769 }
20770
20771 SmallVector<int> ShuffleMask;
20772 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20773 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20774 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20775 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20776 MGN->getMask(), DAG.getUNDEF(VT),
20777 MGN->getMemoryVT(), MGN->getMemOperand(),
20779 SDValue Shuffle =
20780 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20781 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20782 }
20783
20784 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20785 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20786 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20787 SmallVector<SDValue> NewIndices;
20788 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20789 NewIndices.push_back(Index.getOperand(i));
20790 EVT IndexVT = Index.getValueType()
20792 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20793
20794 unsigned ElementSize = VT.getScalarStoreSize();
20795 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20796 auto EltCnt = VT.getVectorElementCount();
20797 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20798 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20799 EltCnt.divideCoefficientBy(2));
20800 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20801 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20802 EltCnt.divideCoefficientBy(2));
20803 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20804
20805 SDValue Gather =
20806 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20807 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20808 Index, ScaleOp},
20809 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20810 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20811 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20812 }
20813 break;
20814 }
20815 case ISD::MSCATTER:{
20816 const auto *MSN = cast<MaskedScatterSDNode>(N);
20817 SDValue Index = MSN->getIndex();
20818 SDValue ScaleOp = MSN->getScale();
20819 ISD::MemIndexType IndexType = MSN->getIndexType();
20820 assert(!MSN->isIndexScaled() &&
20821 "Scaled gather/scatter should not be formed");
20822
20823 SDLoc DL(N);
20824 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20825 return DAG.getMaskedScatter(
20826 N->getVTList(), MSN->getMemoryVT(), DL,
20827 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20828 Index, ScaleOp},
20829 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20830
20831 if (narrowIndex(Index, IndexType, DAG))
20832 return DAG.getMaskedScatter(
20833 N->getVTList(), MSN->getMemoryVT(), DL,
20834 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20835 Index, ScaleOp},
20836 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20837
20838 EVT VT = MSN->getValue()->getValueType(0);
20839 SmallVector<int> ShuffleMask;
20840 if (!MSN->isTruncatingStore() &&
20841 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20842 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20843 DAG.getUNDEF(VT), ShuffleMask);
20844 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20845 DAG.getUNDEF(XLenVT), MSN->getMask(),
20846 MSN->getMemoryVT(), MSN->getMemOperand(),
20847 ISD::UNINDEXED, false);
20848 }
20849 break;
20850 }
20851 case ISD::VP_GATHER: {
20852 const auto *VPGN = cast<VPGatherSDNode>(N);
20853 SDValue Index = VPGN->getIndex();
20854 SDValue ScaleOp = VPGN->getScale();
20855 ISD::MemIndexType IndexType = VPGN->getIndexType();
20856 assert(!VPGN->isIndexScaled() &&
20857 "Scaled gather/scatter should not be formed");
20858
20859 SDLoc DL(N);
20860 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20861 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20862 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20863 ScaleOp, VPGN->getMask(),
20864 VPGN->getVectorLength()},
20865 VPGN->getMemOperand(), IndexType);
20866
20867 if (narrowIndex(Index, IndexType, DAG))
20868 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20869 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20870 ScaleOp, VPGN->getMask(),
20871 VPGN->getVectorLength()},
20872 VPGN->getMemOperand(), IndexType);
20873
20874 break;
20875 }
20876 case ISD::VP_SCATTER: {
20877 const auto *VPSN = cast<VPScatterSDNode>(N);
20878 SDValue Index = VPSN->getIndex();
20879 SDValue ScaleOp = VPSN->getScale();
20880 ISD::MemIndexType IndexType = VPSN->getIndexType();
20881 assert(!VPSN->isIndexScaled() &&
20882 "Scaled gather/scatter should not be formed");
20883
20884 SDLoc DL(N);
20885 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20886 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20887 {VPSN->getChain(), VPSN->getValue(),
20888 VPSN->getBasePtr(), Index, ScaleOp,
20889 VPSN->getMask(), VPSN->getVectorLength()},
20890 VPSN->getMemOperand(), IndexType);
20891
20892 if (narrowIndex(Index, IndexType, DAG))
20893 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20894 {VPSN->getChain(), VPSN->getValue(),
20895 VPSN->getBasePtr(), Index, ScaleOp,
20896 VPSN->getMask(), VPSN->getVectorLength()},
20897 VPSN->getMemOperand(), IndexType);
20898 break;
20899 }
20900 case RISCVISD::SHL_VL:
20901 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20902 return V;
20903 [[fallthrough]];
20904 case RISCVISD::SRA_VL:
20905 case RISCVISD::SRL_VL: {
20906 SDValue ShAmt = N->getOperand(1);
20907 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20908 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20909 SDLoc DL(N);
20910 SDValue VL = N->getOperand(4);
20911 EVT VT = N->getValueType(0);
20912 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20913 ShAmt.getOperand(1), VL);
20914 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20915 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20916 }
20917 break;
20918 }
20919 case ISD::SRA:
20920 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20921 return V;
20922 [[fallthrough]];
20923 case ISD::SRL:
20924 case ISD::SHL: {
20925 if (N->getOpcode() == ISD::SHL) {
20926 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20927 return V;
20928 }
20929 SDValue ShAmt = N->getOperand(1);
20930 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20931 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20932 SDLoc DL(N);
20933 EVT VT = N->getValueType(0);
20934 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20935 ShAmt.getOperand(1),
20936 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
20937 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
20938 }
20939 break;
20940 }
20941 case RISCVISD::ADD_VL:
20942 if (SDValue V = simplifyOp_VL(N))
20943 return V;
20944 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20945 return V;
20946 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20947 return V;
20948 return combineToVWMACC(N, DAG, Subtarget);
20949 case RISCVISD::VWADD_W_VL:
20950 case RISCVISD::VWADDU_W_VL:
20951 case RISCVISD::VWSUB_W_VL:
20952 case RISCVISD::VWSUBU_W_VL:
20953 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
20954 case RISCVISD::OR_VL:
20955 case RISCVISD::SUB_VL:
20956 case RISCVISD::MUL_VL:
20957 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20958 case RISCVISD::VFMADD_VL:
20959 case RISCVISD::VFNMADD_VL:
20960 case RISCVISD::VFMSUB_VL:
20961 case RISCVISD::VFNMSUB_VL:
20962 case RISCVISD::STRICT_VFMADD_VL:
20963 case RISCVISD::STRICT_VFNMADD_VL:
20964 case RISCVISD::STRICT_VFMSUB_VL:
20965 case RISCVISD::STRICT_VFNMSUB_VL:
20966 return performVFMADD_VLCombine(N, DCI, Subtarget);
20967 case RISCVISD::FADD_VL:
20968 case RISCVISD::FSUB_VL:
20969 case RISCVISD::FMUL_VL:
20970 case RISCVISD::VFWADD_W_VL:
20971 case RISCVISD::VFWSUB_W_VL:
20972 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
20973 case ISD::LOAD:
20974 case ISD::STORE: {
20975 if (DCI.isAfterLegalizeDAG())
20976 if (SDValue V = performMemPairCombine(N, DCI))
20977 return V;
20978
20979 if (N->getOpcode() != ISD::STORE)
20980 break;
20981
20982 auto *Store = cast<StoreSDNode>(N);
20983 SDValue Chain = Store->getChain();
20984 EVT MemVT = Store->getMemoryVT();
20985 SDValue Val = Store->getValue();
20986 SDLoc DL(N);
20987
20988 bool IsScalarizable =
20989 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
20990 Store->isSimple() &&
20991 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
20992 isPowerOf2_64(MemVT.getSizeInBits()) &&
20993 MemVT.getSizeInBits() <= Subtarget.getXLen();
20994
20995 // If sufficiently aligned we can scalarize stores of constant vectors of
20996 // any power-of-two size up to XLen bits, provided that they aren't too
20997 // expensive to materialize.
20998 // vsetivli zero, 2, e8, m1, ta, ma
20999 // vmv.v.i v8, 4
21000 // vse64.v v8, (a0)
21001 // ->
21002 // li a1, 1028
21003 // sh a1, 0(a0)
21004 if (DCI.isBeforeLegalize() && IsScalarizable &&
21006 // Get the constant vector bits
21007 APInt NewC(Val.getValueSizeInBits(), 0);
21008 uint64_t EltSize = Val.getScalarValueSizeInBits();
21009 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
21010 if (Val.getOperand(i).isUndef())
21011 continue;
21012 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
21013 i * EltSize);
21014 }
21015 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
21016
21017 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
21018 true) <= 2 &&
21020 NewVT, *Store->getMemOperand())) {
21021 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
21022 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
21023 Store->getPointerInfo(), Store->getBaseAlign(),
21024 Store->getMemOperand()->getFlags());
21025 }
21026 }
21027
21028 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
21029 // vsetivli zero, 2, e16, m1, ta, ma
21030 // vle16.v v8, (a0)
21031 // vse16.v v8, (a1)
21032 if (auto *L = dyn_cast<LoadSDNode>(Val);
21033 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
21034 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
21035 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
21036 L->getMemoryVT() == MemVT) {
21037 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
21039 NewVT, *Store->getMemOperand()) &&
21041 NewVT, *L->getMemOperand())) {
21042 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
21043 L->getPointerInfo(), L->getBaseAlign(),
21044 L->getMemOperand()->getFlags());
21045 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
21046 Store->getPointerInfo(), Store->getBaseAlign(),
21047 Store->getMemOperand()->getFlags());
21048 }
21049 }
21050
21051 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
21052 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
21053 // any illegal types.
21054 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
21055 (DCI.isAfterLegalizeDAG() &&
21057 isNullConstant(Val.getOperand(1)))) &&
21058 Val.hasOneUse()) {
21059 SDValue Src = Val.getOperand(0);
21060 MVT VecVT = Src.getSimpleValueType();
21061 // VecVT should be scalable and memory VT should match the element type.
21062 if (!Store->isIndexed() && VecVT.isScalableVector() &&
21063 MemVT == VecVT.getVectorElementType()) {
21064 SDLoc DL(N);
21065 MVT MaskVT = getMaskTypeFor(VecVT);
21066 return DAG.getStoreVP(
21067 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
21068 DAG.getConstant(1, DL, MaskVT),
21069 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
21070 Store->getMemOperand(), Store->getAddressingMode(),
21071 Store->isTruncatingStore(), /*IsCompress*/ false);
21072 }
21073 }
21074
21075 break;
21076 }
21077 case ISD::SPLAT_VECTOR: {
21078 EVT VT = N->getValueType(0);
21079 // Only perform this combine on legal MVT types.
21080 if (!isTypeLegal(VT))
21081 break;
21082 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
21083 DAG, Subtarget))
21084 return Gather;
21085 break;
21086 }
21087 case ISD::BUILD_VECTOR:
21088 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
21089 return V;
21090 break;
21092 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
21093 return V;
21094 break;
21096 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
21097 return V;
21098 break;
21100 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
21101 return V;
21102 break;
21103 case RISCVISD::VFMV_V_F_VL: {
21104 const MVT VT = N->getSimpleValueType(0);
21105 SDValue Passthru = N->getOperand(0);
21106 SDValue Scalar = N->getOperand(1);
21107 SDValue VL = N->getOperand(2);
21108
21109 // If VL is 1, we can use vfmv.s.f.
21110 if (isOneConstant(VL))
21111 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
21112 break;
21113 }
21114 case RISCVISD::VMV_V_X_VL: {
21115 const MVT VT = N->getSimpleValueType(0);
21116 SDValue Passthru = N->getOperand(0);
21117 SDValue Scalar = N->getOperand(1);
21118 SDValue VL = N->getOperand(2);
21119
21120 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
21121 // scalar input.
21122 unsigned ScalarSize = Scalar.getValueSizeInBits();
21123 unsigned EltWidth = VT.getScalarSizeInBits();
21124 if (ScalarSize > EltWidth && Passthru.isUndef())
21125 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
21126 return SDValue(N, 0);
21127
21128 // If VL is 1 and the scalar value won't benefit from immediate, we can
21129 // use vmv.s.x.
21131 if (isOneConstant(VL) &&
21132 (!Const || Const->isZero() ||
21133 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
21134 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
21135
21136 break;
21137 }
21138 case RISCVISD::VFMV_S_F_VL: {
21139 SDValue Src = N->getOperand(1);
21140 // Try to remove vector->scalar->vector if the scalar->vector is inserting
21141 // into an undef vector.
21142 // TODO: Could use a vslide or vmv.v.v for non-undef.
21143 if (N->getOperand(0).isUndef() &&
21144 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21145 isNullConstant(Src.getOperand(1)) &&
21146 Src.getOperand(0).getValueType().isScalableVector()) {
21147 EVT VT = N->getValueType(0);
21148 SDValue EVSrc = Src.getOperand(0);
21149 EVT EVSrcVT = EVSrc.getValueType();
21151 // Widths match, just return the original vector.
21152 if (EVSrcVT == VT)
21153 return EVSrc;
21154 SDLoc DL(N);
21155 // Width is narrower, using insert_subvector.
21156 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
21157 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
21158 EVSrc,
21159 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21160 }
21161 // Width is wider, using extract_subvector.
21162 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
21163 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21164 }
21165 [[fallthrough]];
21166 }
21167 case RISCVISD::VMV_S_X_VL: {
21168 const MVT VT = N->getSimpleValueType(0);
21169 SDValue Passthru = N->getOperand(0);
21170 SDValue Scalar = N->getOperand(1);
21171 SDValue VL = N->getOperand(2);
21172
21173 // The vmv.s.x instruction copies the scalar integer register to element 0
21174 // of the destination vector register. If SEW < XLEN, the least-significant
21175 // bits are copied and the upper XLEN-SEW bits are ignored.
21176 unsigned ScalarSize = Scalar.getValueSizeInBits();
21177 unsigned EltWidth = VT.getScalarSizeInBits();
21178 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
21179 return SDValue(N, 0);
21180
21181 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
21182 Scalar.getOperand(0).getValueType() == N->getValueType(0))
21183 return Scalar.getOperand(0);
21184
21185 // Use M1 or smaller to avoid over constraining register allocation
21186 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
21187 if (M1VT.bitsLT(VT)) {
21188 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
21189 SDValue Result =
21190 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
21191 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
21192 return Result;
21193 }
21194
21195 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21196 // higher would involve overly constraining the register allocator for
21197 // no purpose.
21198 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21199 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21200 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21201 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21202
21203 break;
21204 }
21205 case RISCVISD::VMV_X_S: {
21206 SDValue Vec = N->getOperand(0);
21207 MVT VecVT = N->getOperand(0).getSimpleValueType();
21208 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21209 if (M1VT.bitsLT(VecVT)) {
21210 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21211 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21212 }
21213 break;
21214 }
21218 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21219 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21220 switch (IntNo) {
21221 // By default we do not combine any intrinsic.
21222 default:
21223 return SDValue();
21224 case Intrinsic::riscv_vcpop:
21225 case Intrinsic::riscv_vcpop_mask:
21226 case Intrinsic::riscv_vfirst:
21227 case Intrinsic::riscv_vfirst_mask: {
21228 SDValue VL = N->getOperand(2);
21229 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21230 IntNo == Intrinsic::riscv_vfirst_mask)
21231 VL = N->getOperand(3);
21232 if (!isNullConstant(VL))
21233 return SDValue();
21234 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21235 SDLoc DL(N);
21236 EVT VT = N->getValueType(0);
21237 if (IntNo == Intrinsic::riscv_vfirst ||
21238 IntNo == Intrinsic::riscv_vfirst_mask)
21239 return DAG.getAllOnesConstant(DL, VT);
21240 return DAG.getConstant(0, DL, VT);
21241 }
21242 case Intrinsic::riscv_vsseg2_mask:
21243 case Intrinsic::riscv_vsseg3_mask:
21244 case Intrinsic::riscv_vsseg4_mask:
21245 case Intrinsic::riscv_vsseg5_mask:
21246 case Intrinsic::riscv_vsseg6_mask:
21247 case Intrinsic::riscv_vsseg7_mask:
21248 case Intrinsic::riscv_vsseg8_mask: {
21249 SDValue Tuple = N->getOperand(2);
21250 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21251
21252 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21253 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21254 !Tuple.getOperand(0).isUndef())
21255 return SDValue();
21256
21257 SDValue Val = Tuple.getOperand(1);
21258 unsigned Idx = Tuple.getConstantOperandVal(2);
21259
21260 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21261 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21262 "Type mismatch without bitcast?");
21263 unsigned Stride = SEW / 8 * NF;
21264 unsigned Offset = SEW / 8 * Idx;
21265
21266 SDValue Ops[] = {
21267 /*Chain=*/N->getOperand(0),
21268 /*IntID=*/
21269 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21270 /*StoredVal=*/Val,
21271 /*Ptr=*/
21272 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21273 DAG.getConstant(Offset, DL, XLenVT)),
21274 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21275 /*Mask=*/N->getOperand(4),
21276 /*VL=*/N->getOperand(5)};
21277
21278 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21279 // Match getTgtMemIntrinsic for non-unit stride case
21280 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21283 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21284
21285 SDVTList VTs = DAG.getVTList(MVT::Other);
21286 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21287 MMO);
21288 }
21289 }
21290 }
21291 case ISD::EXPERIMENTAL_VP_REVERSE:
21292 return performVP_REVERSECombine(N, DAG, Subtarget);
21293 case ISD::VP_STORE:
21294 return performVP_STORECombine(N, DAG, Subtarget);
21295 case ISD::BITCAST: {
21296 assert(Subtarget.useRVVForFixedLengthVectors());
21297 SDValue N0 = N->getOperand(0);
21298 EVT VT = N->getValueType(0);
21299 EVT SrcVT = N0.getValueType();
21300 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21301 unsigned NF = VT.getRISCVVectorTupleNumFields();
21302 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21303 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21304 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21305
21306 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21307
21308 SDValue Result = DAG.getUNDEF(VT);
21309 for (unsigned i = 0; i < NF; ++i)
21310 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21311 DAG.getTargetConstant(i, DL, MVT::i32));
21312 return Result;
21313 }
21314 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21315 // type, widen both sides to avoid a trip through memory.
21316 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21317 VT.isScalarInteger()) {
21318 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21319 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21320 Ops[0] = N0;
21321 SDLoc DL(N);
21322 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21323 N0 = DAG.getBitcast(MVT::i8, N0);
21324 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21325 }
21326
21327 return SDValue();
21328 }
21329 case ISD::VECREDUCE_ADD:
21330 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21331 return V;
21332 [[fallthrough]];
21333 case ISD::CTPOP:
21334 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21335 return V;
21336 break;
21337 case RISCVISD::VRGATHER_VX_VL: {
21338 // Note this assumes that out of bounds indices produce poison
21339 // and can thus be replaced without having to prove them inbounds..
21340 EVT VT = N->getValueType(0);
21341 SDValue Src = N->getOperand(0);
21342 SDValue Idx = N->getOperand(1);
21343 SDValue Passthru = N->getOperand(2);
21344 SDValue VL = N->getOperand(4);
21345
21346 // Warning: Unlike most cases we strip an insert_subvector, this one
21347 // does not require the first operand to be undef.
21348 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21349 isNullConstant(Src.getOperand(2)))
21350 Src = Src.getOperand(1);
21351
21352 switch (Src.getOpcode()) {
21353 default:
21354 break;
21355 case RISCVISD::VMV_V_X_VL:
21356 case RISCVISD::VFMV_V_F_VL:
21357 // Drop a redundant vrgather_vx.
21358 // TODO: Remove the type restriction if we find a motivating
21359 // test case?
21360 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21361 Src.getValueType() == VT)
21362 return Src;
21363 break;
21364 case RISCVISD::VMV_S_X_VL:
21365 case RISCVISD::VFMV_S_F_VL:
21366 // If this use only demands lane zero from the source vmv.s.x, and
21367 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21368 // a vmv.v.x. Note that there can be other uses of the original
21369 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21370 if (isNullConstant(Idx) && Passthru.isUndef() &&
21371 VL == Src.getOperand(2)) {
21372 unsigned Opc =
21373 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21374 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21375 VL);
21376 }
21377 break;
21378 }
21379 break;
21380 }
21381 case RISCVISD::TUPLE_EXTRACT: {
21382 EVT VT = N->getValueType(0);
21383 SDValue Tuple = N->getOperand(0);
21384 unsigned Idx = N->getConstantOperandVal(1);
21385 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21386 break;
21387
21388 unsigned NF = 0;
21389 switch (Tuple.getConstantOperandVal(1)) {
21390 default:
21391 break;
21392 case Intrinsic::riscv_vlseg2_mask:
21393 case Intrinsic::riscv_vlseg3_mask:
21394 case Intrinsic::riscv_vlseg4_mask:
21395 case Intrinsic::riscv_vlseg5_mask:
21396 case Intrinsic::riscv_vlseg6_mask:
21397 case Intrinsic::riscv_vlseg7_mask:
21398 case Intrinsic::riscv_vlseg8_mask:
21399 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21400 break;
21401 }
21402
21403 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21404 break;
21405
21406 unsigned SEW = VT.getScalarSizeInBits();
21407 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21408 "Type mismatch without bitcast?");
21409 unsigned Stride = SEW / 8 * NF;
21410 unsigned Offset = SEW / 8 * Idx;
21411
21412 SDValue Ops[] = {
21413 /*Chain=*/Tuple.getOperand(0),
21414 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21415 /*Passthru=*/Tuple.getOperand(2),
21416 /*Ptr=*/
21417 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21418 DAG.getConstant(Offset, DL, XLenVT)),
21419 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21420 /*Mask=*/Tuple.getOperand(4),
21421 /*VL=*/Tuple.getOperand(5),
21422 /*Policy=*/Tuple.getOperand(6)};
21423
21424 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21425 // Match getTgtMemIntrinsic for non-unit stride case
21426 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21429 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21430
21431 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21433 Ops, MemVT, MMO);
21434 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21435 return Result.getValue(0);
21436 }
21437 case RISCVISD::TUPLE_INSERT: {
21438 // tuple_insert tuple, undef, idx -> tuple
21439 if (N->getOperand(1).isUndef())
21440 return N->getOperand(0);
21441 break;
21442 }
21443 case RISCVISD::VSLIDE1UP_VL:
21444 case RISCVISD::VFSLIDE1UP_VL: {
21445 using namespace SDPatternMatch;
21446 SDValue SrcVec;
21447 SDLoc DL(N);
21448 MVT VT = N->getSimpleValueType(0);
21449 // If the scalar we're sliding in was extracted from the first element of a
21450 // vector, we can use that vector as the passthru in a normal slideup of 1.
21451 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21452 if (!N->getOperand(0).isUndef() ||
21453 !sd_match(N->getOperand(2),
21454 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21455 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21456 break;
21457
21458 MVT SrcVecVT = SrcVec.getSimpleValueType();
21459 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21460 break;
21461 // Adapt the value type of source vector.
21462 if (SrcVecVT.isFixedLengthVector()) {
21463 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21464 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21465 }
21467 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21468 else
21469 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21470
21471 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21472 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21473 N->getOperand(4));
21474 }
21475 }
21476
21477 return SDValue();
21478}
21479
21481 EVT XVT, unsigned KeptBits) const {
21482 // For vectors, we don't have a preference..
21483 if (XVT.isVector())
21484 return false;
21485
21486 if (XVT != MVT::i32 && XVT != MVT::i64)
21487 return false;
21488
21489 // We can use sext.w for RV64 or an srai 31 on RV32.
21490 if (KeptBits == 32 || KeptBits == 64)
21491 return true;
21492
21493 // With Zbb we can use sext.h/sext.b.
21494 return Subtarget.hasStdExtZbb() &&
21495 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21496 KeptBits == 16);
21497}
21498
21500 const SDNode *N, CombineLevel Level) const {
21501 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21502 N->getOpcode() == ISD::SRL) &&
21503 "Expected shift op");
21504
21505 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21506 // materialised in fewer instructions than `(OP _, c1)`:
21507 //
21508 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21509 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
21510 SDValue N0 = N->getOperand(0);
21511 EVT Ty = N0.getValueType();
21512
21513 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
21514 // LD/ST, it can still complete the folding optimization operation performed
21515 // above.
21516 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
21517 for (SDNode *Use : X->users()) {
21518 // This use is the one we're on right now. Skip it
21519 if (Use == User || Use->getOpcode() == ISD::SELECT)
21520 continue;
21522 return false;
21523 }
21524 return true;
21525 };
21526
21527 if (Ty.isScalarInteger() &&
21528 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
21529 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
21530 return isUsedByLdSt(N0.getNode(), N);
21531
21532 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21533 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
21534
21535 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21536 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
21537 N->user_begin()->getOpcode() == ISD::ADD &&
21538 !isUsedByLdSt(*N->user_begin(), nullptr) &&
21539 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
21540 return false;
21541
21542 if (C1 && C2) {
21543 const APInt &C1Int = C1->getAPIntValue();
21544 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
21545
21546 // We can materialise `c1 << c2` into an add immediate, so it's "free",
21547 // and the combine should happen, to potentially allow further combines
21548 // later.
21549 if (ShiftedC1Int.getSignificantBits() <= 64 &&
21550 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
21551 return true;
21552
21553 // We can materialise `c1` in an add immediate, so it's "free", and the
21554 // combine should be prevented.
21555 if (C1Int.getSignificantBits() <= 64 &&
21557 return false;
21558
21559 // Neither constant will fit into an immediate, so find materialisation
21560 // costs.
21561 int C1Cost =
21562 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
21563 /*CompressionCost*/ true);
21564 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
21565 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
21566 /*CompressionCost*/ true);
21567
21568 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
21569 // combine should be prevented.
21570 if (C1Cost < ShiftedC1Cost)
21571 return false;
21572 }
21573 }
21574
21575 if (!N0->hasOneUse())
21576 return false;
21577
21578 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
21579 N0->getOperand(0)->getOpcode() == ISD::ADD &&
21580 !N0->getOperand(0)->hasOneUse())
21581 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
21582
21583 return true;
21584}
21585
21587 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
21588 TargetLoweringOpt &TLO) const {
21589 // Delay this optimization as late as possible.
21590 if (!TLO.LegalOps)
21591 return false;
21592
21593 EVT VT = Op.getValueType();
21594 if (VT.isVector())
21595 return false;
21596
21597 unsigned Opcode = Op.getOpcode();
21598 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
21599 return false;
21600
21601 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
21602 if (!C)
21603 return false;
21604
21605 const APInt &Mask = C->getAPIntValue();
21606
21607 // Clear all non-demanded bits initially.
21608 APInt ShrunkMask = Mask & DemandedBits;
21609
21610 // Try to make a smaller immediate by setting undemanded bits.
21611
21612 APInt ExpandedMask = Mask | ~DemandedBits;
21613
21614 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
21615 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
21616 };
21617 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
21618 if (NewMask == Mask)
21619 return true;
21620 SDLoc DL(Op);
21621 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
21622 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
21623 Op.getOperand(0), NewC);
21624 return TLO.CombineTo(Op, NewOp);
21625 };
21626
21627 // If the shrunk mask fits in sign extended 12 bits, let the target
21628 // independent code apply it.
21629 if (ShrunkMask.isSignedIntN(12))
21630 return false;
21631
21632 // And has a few special cases for zext.
21633 if (Opcode == ISD::AND) {
21634 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
21635 // otherwise use SLLI + SRLI.
21636 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
21637 if (IsLegalMask(NewMask))
21638 return UseMask(NewMask);
21639
21640 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
21641 if (VT == MVT::i64) {
21642 APInt NewMask = APInt(64, 0xffffffff);
21643 if (IsLegalMask(NewMask))
21644 return UseMask(NewMask);
21645 }
21646 }
21647
21648 // For the remaining optimizations, we need to be able to make a negative
21649 // number through a combination of mask and undemanded bits.
21650 if (!ExpandedMask.isNegative())
21651 return false;
21652
21653 // What is the fewest number of bits we need to represent the negative number.
21654 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
21655
21656 // Try to make a 12 bit negative immediate. If that fails try to make a 32
21657 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
21658 // If we can't create a simm12, we shouldn't change opaque constants.
21659 APInt NewMask = ShrunkMask;
21660 if (MinSignedBits <= 12)
21661 NewMask.setBitsFrom(11);
21662 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
21663 NewMask.setBitsFrom(31);
21664 else
21665 return false;
21666
21667 // Check that our new mask is a subset of the demanded mask.
21668 assert(IsLegalMask(NewMask));
21669 return UseMask(NewMask);
21670}
21671
21672static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
21673 static const uint64_t GREVMasks[] = {
21674 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
21675 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
21676
21677 for (unsigned Stage = 0; Stage != 6; ++Stage) {
21678 unsigned Shift = 1 << Stage;
21679 if (ShAmt & Shift) {
21680 uint64_t Mask = GREVMasks[Stage];
21681 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
21682 if (IsGORC)
21683 Res |= x;
21684 x = Res;
21685 }
21686 }
21687
21688 return x;
21689}
21690
21692 KnownBits &Known,
21693 const APInt &DemandedElts,
21694 const SelectionDAG &DAG,
21695 unsigned Depth) const {
21696 unsigned BitWidth = Known.getBitWidth();
21697 unsigned Opc = Op.getOpcode();
21702 "Should use MaskedValueIsZero if you don't know whether Op"
21703 " is a target node!");
21704
21705 Known.resetAll();
21706 switch (Opc) {
21707 default: break;
21708 case RISCVISD::SELECT_CC: {
21709 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
21710 // If we don't know any bits, early out.
21711 if (Known.isUnknown())
21712 break;
21713 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
21714
21715 // Only known if known in both the LHS and RHS.
21716 Known = Known.intersectWith(Known2);
21717 break;
21718 }
21719 case RISCVISD::VCPOP_VL: {
21720 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
21721 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
21722 break;
21723 }
21724 case RISCVISD::CZERO_EQZ:
21725 case RISCVISD::CZERO_NEZ:
21726 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21727 // Result is either all zero or operand 0. We can propagate zeros, but not
21728 // ones.
21729 Known.One.clearAllBits();
21730 break;
21731 case RISCVISD::REMUW: {
21732 KnownBits Known2;
21733 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21734 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21735 // We only care about the lower 32 bits.
21736 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
21737 // Restore the original width by sign extending.
21738 Known = Known.sext(BitWidth);
21739 break;
21740 }
21741 case RISCVISD::DIVUW: {
21742 KnownBits Known2;
21743 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21744 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21745 // We only care about the lower 32 bits.
21746 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
21747 // Restore the original width by sign extending.
21748 Known = Known.sext(BitWidth);
21749 break;
21750 }
21751 case RISCVISD::SLLW: {
21752 KnownBits Known2;
21753 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21754 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21755 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
21756 // Restore the original width by sign extending.
21757 Known = Known.sext(BitWidth);
21758 break;
21759 }
21760 case RISCVISD::SRLW: {
21761 KnownBits Known2;
21762 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21763 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21764 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
21765 // Restore the original width by sign extending.
21766 Known = Known.sext(BitWidth);
21767 break;
21768 }
21769 case RISCVISD::SRAW: {
21770 KnownBits Known2;
21771 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21772 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
21773 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
21774 // Restore the original width by sign extending.
21775 Known = Known.sext(BitWidth);
21776 break;
21777 }
21778 case RISCVISD::SHL_ADD: {
21779 KnownBits Known2;
21780 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
21781 unsigned ShAmt = Op.getConstantOperandVal(1);
21782 Known <<= ShAmt;
21783 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
21784 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
21785 Known = KnownBits::add(Known, Known2);
21786 break;
21787 }
21788 case RISCVISD::CTZW: {
21789 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21790 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
21791 unsigned LowBits = llvm::bit_width(PossibleTZ);
21792 Known.Zero.setBitsFrom(LowBits);
21793 break;
21794 }
21795 case RISCVISD::CLZW: {
21796 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21797 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
21798 unsigned LowBits = llvm::bit_width(PossibleLZ);
21799 Known.Zero.setBitsFrom(LowBits);
21800 break;
21801 }
21802 case RISCVISD::BREV8:
21803 case RISCVISD::ORC_B: {
21804 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
21805 // control value of 7 is equivalent to brev8 and orc.b.
21806 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
21807 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21808 // To compute zeros for ORC_B, we need to invert the value and invert it
21809 // back after. This inverting is harmless for BREV8.
21810 Known.Zero =
21811 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
21812 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
21813 break;
21814 }
21815 case RISCVISD::READ_VLENB: {
21816 // We can use the minimum and maximum VLEN values to bound VLENB. We
21817 // know VLEN must be a power of two.
21818 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
21819 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
21820 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
21821 Known.Zero.setLowBits(Log2_32(MinVLenB));
21822 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
21823 if (MaxVLenB == MinVLenB)
21824 Known.One.setBit(Log2_32(MinVLenB));
21825 break;
21826 }
21827 case RISCVISD::FCLASS: {
21828 // fclass will only set one of the low 10 bits.
21829 Known.Zero.setBitsFrom(10);
21830 break;
21831 }
21834 unsigned IntNo =
21835 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
21836 switch (IntNo) {
21837 default:
21838 // We can't do anything for most intrinsics.
21839 break;
21840 case Intrinsic::riscv_vsetvli:
21841 case Intrinsic::riscv_vsetvlimax: {
21842 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
21843 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
21844 RISCVVType::VLMUL VLMUL =
21845 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
21846 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
21847 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
21848 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
21849 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
21850
21851 // Result of vsetvli must be not larger than AVL.
21852 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
21853 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
21854
21855 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
21856 if (BitWidth > KnownZeroFirstBit)
21857 Known.Zero.setBitsFrom(KnownZeroFirstBit);
21858 break;
21859 }
21860 }
21861 break;
21862 }
21863 }
21864}
21865
21867 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21868 unsigned Depth) const {
21869 switch (Op.getOpcode()) {
21870 default:
21871 break;
21872 case RISCVISD::SELECT_CC: {
21873 unsigned Tmp =
21874 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
21875 if (Tmp == 1) return 1; // Early out.
21876 unsigned Tmp2 =
21877 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
21878 return std::min(Tmp, Tmp2);
21879 }
21880 case RISCVISD::CZERO_EQZ:
21881 case RISCVISD::CZERO_NEZ:
21882 // Output is either all zero or operand 0. We can propagate sign bit count
21883 // from operand 0.
21884 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21885 case RISCVISD::NEGW_MAX: {
21886 // We expand this at isel to negw+max. The result will have 33 sign bits
21887 // if the input has at least 33 sign bits.
21888 unsigned Tmp =
21889 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21890 if (Tmp < 33) return 1;
21891 return 33;
21892 }
21893 case RISCVISD::SRAW: {
21894 unsigned Tmp =
21895 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
21896 // sraw produces at least 33 sign bits. If the input already has more than
21897 // 33 sign bits sraw, will preserve them.
21898 // TODO: A more precise answer could be calculated depending on known bits
21899 // in the shift amount.
21900 return std::max(Tmp, 33U);
21901 }
21902 case RISCVISD::SLLW:
21903 case RISCVISD::SRLW:
21904 case RISCVISD::DIVW:
21905 case RISCVISD::DIVUW:
21906 case RISCVISD::REMUW:
21907 case RISCVISD::ROLW:
21908 case RISCVISD::RORW:
21909 case RISCVISD::ABSW:
21910 case RISCVISD::FCVT_W_RV64:
21911 case RISCVISD::FCVT_WU_RV64:
21912 case RISCVISD::STRICT_FCVT_W_RV64:
21913 case RISCVISD::STRICT_FCVT_WU_RV64:
21914 // TODO: As the result is sign-extended, this is conservatively correct.
21915 return 33;
21916 case RISCVISD::VMV_X_S: {
21917 // The number of sign bits of the scalar result is computed by obtaining the
21918 // element type of the input vector operand, subtracting its width from the
21919 // XLEN, and then adding one (sign bit within the element type). If the
21920 // element type is wider than XLen, the least-significant XLEN bits are
21921 // taken.
21922 unsigned XLen = Subtarget.getXLen();
21923 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
21924 if (EltBits <= XLen)
21925 return XLen - EltBits + 1;
21926 break;
21927 }
21929 unsigned IntNo = Op.getConstantOperandVal(1);
21930 switch (IntNo) {
21931 default:
21932 break;
21933 case Intrinsic::riscv_masked_atomicrmw_xchg:
21934 case Intrinsic::riscv_masked_atomicrmw_add:
21935 case Intrinsic::riscv_masked_atomicrmw_sub:
21936 case Intrinsic::riscv_masked_atomicrmw_nand:
21937 case Intrinsic::riscv_masked_atomicrmw_max:
21938 case Intrinsic::riscv_masked_atomicrmw_min:
21939 case Intrinsic::riscv_masked_atomicrmw_umax:
21940 case Intrinsic::riscv_masked_atomicrmw_umin:
21941 case Intrinsic::riscv_masked_cmpxchg:
21942 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
21943 // narrow atomic operation. These are implemented using atomic
21944 // operations at the minimum supported atomicrmw/cmpxchg width whose
21945 // result is then sign extended to XLEN. With +A, the minimum width is
21946 // 32 for both 64 and 32.
21948 assert(Subtarget.hasStdExtZalrsc());
21949 return Op.getValueSizeInBits() - 31;
21950 }
21951 break;
21952 }
21953 }
21954
21955 return 1;
21956}
21957
21959 SDValue Op, const APInt &OriginalDemandedBits,
21960 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
21961 unsigned Depth) const {
21962 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
21963
21964 switch (Op.getOpcode()) {
21965 case RISCVISD::BREV8:
21966 case RISCVISD::ORC_B: {
21967 KnownBits Known2;
21968 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
21969 // For BREV8, we need to do BREV8 on the demanded bits.
21970 // For ORC_B, any bit in the output demandeds all bits from the same byte.
21971 // So we need to do ORC_B on the demanded bits.
21973 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
21974 7, IsGORC));
21975 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
21976 OriginalDemandedElts, Known2, TLO, Depth + 1))
21977 return true;
21978
21979 // To compute zeros for ORC_B, we need to invert the value and invert it
21980 // back after. This inverting is harmless for BREV8.
21981 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
21982 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
21983 return false;
21984 }
21985 }
21986
21988 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
21989}
21990
21992 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
21993 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
21994
21995 // TODO: Add more target nodes.
21996 switch (Op.getOpcode()) {
21997 case RISCVISD::SLLW:
21998 case RISCVISD::SRAW:
21999 case RISCVISD::SRLW:
22000 case RISCVISD::RORW:
22001 case RISCVISD::ROLW:
22002 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
22003 // amount is bounds.
22004 return false;
22005 case RISCVISD::SELECT_CC:
22006 // Integer comparisons cannot create poison.
22007 assert(Op.getOperand(0).getValueType().isInteger() &&
22008 "RISCVISD::SELECT_CC only compares integers");
22009 return false;
22010 }
22012 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
22013}
22014
22015const Constant *
22017 assert(Ld && "Unexpected null LoadSDNode");
22018 if (!ISD::isNormalLoad(Ld))
22019 return nullptr;
22020
22021 SDValue Ptr = Ld->getBasePtr();
22022
22023 // Only constant pools with no offset are supported.
22024 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
22025 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
22026 if (!CNode || CNode->isMachineConstantPoolEntry() ||
22027 CNode->getOffset() != 0)
22028 return nullptr;
22029
22030 return CNode;
22031 };
22032
22033 // Simple case, LLA.
22034 if (Ptr.getOpcode() == RISCVISD::LLA) {
22035 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
22036 if (!CNode || CNode->getTargetFlags() != 0)
22037 return nullptr;
22038
22039 return CNode->getConstVal();
22040 }
22041
22042 // Look for a HI and ADD_LO pair.
22043 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
22044 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
22045 return nullptr;
22046
22047 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
22048 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
22049
22050 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
22051 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
22052 return nullptr;
22053
22054 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
22055 return nullptr;
22056
22057 return CNodeLo->getConstVal();
22058}
22059
22061 MachineBasicBlock *BB) {
22062 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
22063
22064 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
22065 // Should the count have wrapped while it was being read, we need to try
22066 // again.
22067 // For example:
22068 // ```
22069 // read:
22070 // csrrs x3, counterh # load high word of counter
22071 // csrrs x2, counter # load low word of counter
22072 // csrrs x4, counterh # load high word of counter
22073 // bne x3, x4, read # check if high word reads match, otherwise try again
22074 // ```
22075
22076 MachineFunction &MF = *BB->getParent();
22077 const BasicBlock *LLVMBB = BB->getBasicBlock();
22079
22080 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
22081 MF.insert(It, LoopMBB);
22082
22083 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
22084 MF.insert(It, DoneMBB);
22085
22086 // Transfer the remainder of BB and its successor edges to DoneMBB.
22087 DoneMBB->splice(DoneMBB->begin(), BB,
22088 std::next(MachineBasicBlock::iterator(MI)), BB->end());
22090
22091 BB->addSuccessor(LoopMBB);
22092
22094 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22095 Register LoReg = MI.getOperand(0).getReg();
22096 Register HiReg = MI.getOperand(1).getReg();
22097 int64_t LoCounter = MI.getOperand(2).getImm();
22098 int64_t HiCounter = MI.getOperand(3).getImm();
22099 DebugLoc DL = MI.getDebugLoc();
22100
22102 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
22103 .addImm(HiCounter)
22104 .addReg(RISCV::X0);
22105 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
22106 .addImm(LoCounter)
22107 .addReg(RISCV::X0);
22108 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
22109 .addImm(HiCounter)
22110 .addReg(RISCV::X0);
22111
22112 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
22113 .addReg(HiReg)
22114 .addReg(ReadAgainReg)
22115 .addMBB(LoopMBB);
22116
22117 LoopMBB->addSuccessor(LoopMBB);
22118 LoopMBB->addSuccessor(DoneMBB);
22119
22120 MI.eraseFromParent();
22121
22122 return DoneMBB;
22123}
22124
22127 const RISCVSubtarget &Subtarget) {
22128 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
22129
22130 MachineFunction &MF = *BB->getParent();
22131 DebugLoc DL = MI.getDebugLoc();
22134 Register LoReg = MI.getOperand(0).getReg();
22135 Register HiReg = MI.getOperand(1).getReg();
22136 Register SrcReg = MI.getOperand(2).getReg();
22137
22138 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
22139 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22140
22141 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
22142 RI, Register());
22144 MachineMemOperand *MMOLo =
22148 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
22149 .addFrameIndex(FI)
22150 .addImm(0)
22151 .addMemOperand(MMOLo);
22152 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
22153 .addFrameIndex(FI)
22154 .addImm(4)
22155 .addMemOperand(MMOHi);
22156 MI.eraseFromParent(); // The pseudo instruction is gone now.
22157 return BB;
22158}
22159
22162 const RISCVSubtarget &Subtarget) {
22163 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
22164 "Unexpected instruction");
22165
22166 MachineFunction &MF = *BB->getParent();
22167 DebugLoc DL = MI.getDebugLoc();
22170 Register DstReg = MI.getOperand(0).getReg();
22171 Register LoReg = MI.getOperand(1).getReg();
22172 Register HiReg = MI.getOperand(2).getReg();
22173
22174 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
22175 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22176
22178 MachineMemOperand *MMOLo =
22182 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22183 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
22184 .addFrameIndex(FI)
22185 .addImm(0)
22186 .addMemOperand(MMOLo);
22187 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22188 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
22189 .addFrameIndex(FI)
22190 .addImm(4)
22191 .addMemOperand(MMOHi);
22192 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
22193 MI.eraseFromParent(); // The pseudo instruction is gone now.
22194 return BB;
22195}
22196
22198 unsigned RelOpcode, unsigned EqOpcode,
22199 const RISCVSubtarget &Subtarget) {
22200 DebugLoc DL = MI.getDebugLoc();
22201 Register DstReg = MI.getOperand(0).getReg();
22202 Register Src1Reg = MI.getOperand(1).getReg();
22203 Register Src2Reg = MI.getOperand(2).getReg();
22205 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22207
22208 // Save the current FFLAGS.
22209 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22210
22211 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22212 .addReg(Src1Reg)
22213 .addReg(Src2Reg);
22216
22217 // Restore the FFLAGS.
22218 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22219 .addReg(SavedFFlags, RegState::Kill);
22220
22221 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22222 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22223 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22224 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22227
22228 // Erase the pseudoinstruction.
22229 MI.eraseFromParent();
22230 return BB;
22231}
22232
22233static MachineBasicBlock *
22235 MachineBasicBlock *ThisMBB,
22236 const RISCVSubtarget &Subtarget) {
22237 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22238 // Without this, custom-inserter would have generated:
22239 //
22240 // A
22241 // | \
22242 // | B
22243 // | /
22244 // C
22245 // | \
22246 // | D
22247 // | /
22248 // E
22249 //
22250 // A: X = ...; Y = ...
22251 // B: empty
22252 // C: Z = PHI [X, A], [Y, B]
22253 // D: empty
22254 // E: PHI [X, C], [Z, D]
22255 //
22256 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22257 //
22258 // A
22259 // | \
22260 // | C
22261 // | /|
22262 // |/ |
22263 // | |
22264 // | D
22265 // | /
22266 // E
22267 //
22268 // A: X = ...; Y = ...
22269 // D: empty
22270 // E: PHI [X, A], [X, C], [Y, D]
22271
22272 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22273 const DebugLoc &DL = First.getDebugLoc();
22274 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22275 MachineFunction *F = ThisMBB->getParent();
22276 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22277 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22278 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22279 MachineFunction::iterator It = ++ThisMBB->getIterator();
22280 F->insert(It, FirstMBB);
22281 F->insert(It, SecondMBB);
22282 F->insert(It, SinkMBB);
22283
22284 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22285 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22287 ThisMBB->end());
22288 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22289
22290 // Fallthrough block for ThisMBB.
22291 ThisMBB->addSuccessor(FirstMBB);
22292 // Fallthrough block for FirstMBB.
22293 FirstMBB->addSuccessor(SecondMBB);
22294 ThisMBB->addSuccessor(SinkMBB);
22295 FirstMBB->addSuccessor(SinkMBB);
22296 // This is fallthrough.
22297 SecondMBB->addSuccessor(SinkMBB);
22298
22299 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22300 Register FLHS = First.getOperand(1).getReg();
22301 Register FRHS = First.getOperand(2).getReg();
22302 // Insert appropriate branch.
22303 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22304 .addReg(FLHS)
22305 .addReg(FRHS)
22306 .addMBB(SinkMBB);
22307
22308 Register SLHS = Second.getOperand(1).getReg();
22309 Register SRHS = Second.getOperand(2).getReg();
22310 Register Op1Reg4 = First.getOperand(4).getReg();
22311 Register Op1Reg5 = First.getOperand(5).getReg();
22312
22313 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22314 // Insert appropriate branch.
22315 BuildMI(ThisMBB, DL,
22316 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22317 .addReg(SLHS)
22318 .addReg(SRHS)
22319 .addMBB(SinkMBB);
22320
22321 Register DestReg = Second.getOperand(0).getReg();
22322 Register Op2Reg4 = Second.getOperand(4).getReg();
22323 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22324 .addReg(Op2Reg4)
22325 .addMBB(ThisMBB)
22326 .addReg(Op1Reg4)
22327 .addMBB(FirstMBB)
22328 .addReg(Op1Reg5)
22329 .addMBB(SecondMBB);
22330
22331 // Now remove the Select_FPRX_s.
22332 First.eraseFromParent();
22333 Second.eraseFromParent();
22334 return SinkMBB;
22335}
22336
22339 const RISCVSubtarget &Subtarget) {
22340 // To "insert" Select_* instructions, we actually have to insert the triangle
22341 // control-flow pattern. The incoming instructions know the destination vreg
22342 // to set, the condition code register to branch on, the true/false values to
22343 // select between, and the condcode to use to select the appropriate branch.
22344 //
22345 // We produce the following control flow:
22346 // HeadMBB
22347 // | \
22348 // | IfFalseMBB
22349 // | /
22350 // TailMBB
22351 //
22352 // When we find a sequence of selects we attempt to optimize their emission
22353 // by sharing the control flow. Currently we only handle cases where we have
22354 // multiple selects with the exact same condition (same LHS, RHS and CC).
22355 // The selects may be interleaved with other instructions if the other
22356 // instructions meet some requirements we deem safe:
22357 // - They are not pseudo instructions.
22358 // - They are debug instructions. Otherwise,
22359 // - They do not have side-effects, do not access memory and their inputs do
22360 // not depend on the results of the select pseudo-instructions.
22361 // - They don't adjust stack.
22362 // The TrueV/FalseV operands of the selects cannot depend on the result of
22363 // previous selects in the sequence.
22364 // These conditions could be further relaxed. See the X86 target for a
22365 // related approach and more information.
22366 //
22367 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22368 // is checked here and handled by a separate function -
22369 // EmitLoweredCascadedSelect.
22370
22371 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22372 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22373 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22374 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22375 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22376 Next->getOperand(5).isKill())
22377 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22378
22379 Register LHS = MI.getOperand(1).getReg();
22380 Register RHS;
22381 if (MI.getOperand(2).isReg())
22382 RHS = MI.getOperand(2).getReg();
22383 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22384
22385 SmallVector<MachineInstr *, 4> SelectDebugValues;
22386 SmallSet<Register, 4> SelectDests;
22387 SelectDests.insert(MI.getOperand(0).getReg());
22388
22389 MachineInstr *LastSelectPseudo = &MI;
22390 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22391
22392 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22393 SequenceMBBI != E; ++SequenceMBBI) {
22394 if (SequenceMBBI->isDebugInstr())
22395 continue;
22396 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22397 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22398 !SequenceMBBI->getOperand(2).isReg() ||
22399 SequenceMBBI->getOperand(2).getReg() != RHS ||
22400 SequenceMBBI->getOperand(3).getImm() != CC ||
22401 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22402 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22403 break;
22404 LastSelectPseudo = &*SequenceMBBI;
22405 SequenceMBBI->collectDebugValues(SelectDebugValues);
22406 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22407 continue;
22408 }
22409 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22410 SequenceMBBI->mayLoadOrStore() ||
22411 SequenceMBBI->usesCustomInsertionHook() ||
22412 TII.isFrameInstr(*SequenceMBBI) ||
22413 SequenceMBBI->isStackAligningInlineAsm())
22414 break;
22415 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22416 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22417 }))
22418 break;
22419 }
22420
22421 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22422 DebugLoc DL = MI.getDebugLoc();
22424
22425 MachineBasicBlock *HeadMBB = BB;
22426 MachineFunction *F = BB->getParent();
22427 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22428 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22429
22430 F->insert(I, IfFalseMBB);
22431 F->insert(I, TailMBB);
22432
22433 // Set the call frame size on entry to the new basic blocks.
22434 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22435 IfFalseMBB->setCallFrameSize(CallFrameSize);
22436 TailMBB->setCallFrameSize(CallFrameSize);
22437
22438 // Transfer debug instructions associated with the selects to TailMBB.
22439 for (MachineInstr *DebugInstr : SelectDebugValues) {
22440 TailMBB->push_back(DebugInstr->removeFromParent());
22441 }
22442
22443 // Move all instructions after the sequence to TailMBB.
22444 TailMBB->splice(TailMBB->end(), HeadMBB,
22445 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22446 // Update machine-CFG edges by transferring all successors of the current
22447 // block to the new block which will contain the Phi nodes for the selects.
22448 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22449 // Set the successors for HeadMBB.
22450 HeadMBB->addSuccessor(IfFalseMBB);
22451 HeadMBB->addSuccessor(TailMBB);
22452
22453 // Insert appropriate branch.
22454 if (MI.getOperand(2).isImm())
22455 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22456 .addReg(LHS)
22457 .addImm(MI.getOperand(2).getImm())
22458 .addMBB(TailMBB);
22459 else
22460 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22461 .addReg(LHS)
22462 .addReg(RHS)
22463 .addMBB(TailMBB);
22464
22465 // IfFalseMBB just falls through to TailMBB.
22466 IfFalseMBB->addSuccessor(TailMBB);
22467
22468 // Create PHIs for all of the select pseudo-instructions.
22469 auto SelectMBBI = MI.getIterator();
22470 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22471 auto InsertionPoint = TailMBB->begin();
22472 while (SelectMBBI != SelectEnd) {
22473 auto Next = std::next(SelectMBBI);
22474 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22475 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22476 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22477 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22478 .addReg(SelectMBBI->getOperand(4).getReg())
22479 .addMBB(HeadMBB)
22480 .addReg(SelectMBBI->getOperand(5).getReg())
22481 .addMBB(IfFalseMBB);
22482 SelectMBBI->eraseFromParent();
22483 }
22484 SelectMBBI = Next;
22485 }
22486
22487 F->getProperties().resetNoPHIs();
22488 return TailMBB;
22489}
22490
22491// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22492static const RISCV::RISCVMaskedPseudoInfo *
22493lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22495 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22496 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22498 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22499 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22500 return Masked;
22501}
22502
22505 unsigned CVTXOpc) {
22506 DebugLoc DL = MI.getDebugLoc();
22507
22509
22511 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22512
22513 // Save the old value of FFLAGS.
22514 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
22515
22516 assert(MI.getNumOperands() == 7);
22517
22518 // Emit a VFCVT_X_F
22519 const TargetRegisterInfo *TRI =
22521 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
22522 Register Tmp = MRI.createVirtualRegister(RC);
22523 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
22524 .add(MI.getOperand(1))
22525 .add(MI.getOperand(2))
22526 .add(MI.getOperand(3))
22527 .add(MachineOperand::CreateImm(7)) // frm = DYN
22528 .add(MI.getOperand(4))
22529 .add(MI.getOperand(5))
22530 .add(MI.getOperand(6))
22531 .add(MachineOperand::CreateReg(RISCV::FRM,
22532 /*IsDef*/ false,
22533 /*IsImp*/ true));
22534
22535 // Emit a VFCVT_F_X
22536 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
22537 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
22538 // There is no E8 variant for VFCVT_F_X.
22539 assert(Log2SEW >= 4);
22540 unsigned CVTFOpc =
22541 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
22542 ->MaskedPseudo;
22543
22544 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
22545 .add(MI.getOperand(0))
22546 .add(MI.getOperand(1))
22547 .addReg(Tmp)
22548 .add(MI.getOperand(3))
22549 .add(MachineOperand::CreateImm(7)) // frm = DYN
22550 .add(MI.getOperand(4))
22551 .add(MI.getOperand(5))
22552 .add(MI.getOperand(6))
22553 .add(MachineOperand::CreateReg(RISCV::FRM,
22554 /*IsDef*/ false,
22555 /*IsImp*/ true));
22556
22557 // Restore FFLAGS.
22558 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22559 .addReg(SavedFFLAGS, RegState::Kill);
22560
22561 // Erase the pseudoinstruction.
22562 MI.eraseFromParent();
22563 return BB;
22564}
22565
22567 const RISCVSubtarget &Subtarget) {
22568 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
22569 const TargetRegisterClass *RC;
22570 switch (MI.getOpcode()) {
22571 default:
22572 llvm_unreachable("Unexpected opcode");
22573 case RISCV::PseudoFROUND_H:
22574 CmpOpc = RISCV::FLT_H;
22575 F2IOpc = RISCV::FCVT_W_H;
22576 I2FOpc = RISCV::FCVT_H_W;
22577 FSGNJOpc = RISCV::FSGNJ_H;
22578 FSGNJXOpc = RISCV::FSGNJX_H;
22579 RC = &RISCV::FPR16RegClass;
22580 break;
22581 case RISCV::PseudoFROUND_H_INX:
22582 CmpOpc = RISCV::FLT_H_INX;
22583 F2IOpc = RISCV::FCVT_W_H_INX;
22584 I2FOpc = RISCV::FCVT_H_W_INX;
22585 FSGNJOpc = RISCV::FSGNJ_H_INX;
22586 FSGNJXOpc = RISCV::FSGNJX_H_INX;
22587 RC = &RISCV::GPRF16RegClass;
22588 break;
22589 case RISCV::PseudoFROUND_S:
22590 CmpOpc = RISCV::FLT_S;
22591 F2IOpc = RISCV::FCVT_W_S;
22592 I2FOpc = RISCV::FCVT_S_W;
22593 FSGNJOpc = RISCV::FSGNJ_S;
22594 FSGNJXOpc = RISCV::FSGNJX_S;
22595 RC = &RISCV::FPR32RegClass;
22596 break;
22597 case RISCV::PseudoFROUND_S_INX:
22598 CmpOpc = RISCV::FLT_S_INX;
22599 F2IOpc = RISCV::FCVT_W_S_INX;
22600 I2FOpc = RISCV::FCVT_S_W_INX;
22601 FSGNJOpc = RISCV::FSGNJ_S_INX;
22602 FSGNJXOpc = RISCV::FSGNJX_S_INX;
22603 RC = &RISCV::GPRF32RegClass;
22604 break;
22605 case RISCV::PseudoFROUND_D:
22606 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22607 CmpOpc = RISCV::FLT_D;
22608 F2IOpc = RISCV::FCVT_L_D;
22609 I2FOpc = RISCV::FCVT_D_L;
22610 FSGNJOpc = RISCV::FSGNJ_D;
22611 FSGNJXOpc = RISCV::FSGNJX_D;
22612 RC = &RISCV::FPR64RegClass;
22613 break;
22614 case RISCV::PseudoFROUND_D_INX:
22615 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
22616 CmpOpc = RISCV::FLT_D_INX;
22617 F2IOpc = RISCV::FCVT_L_D_INX;
22618 I2FOpc = RISCV::FCVT_D_L_INX;
22619 FSGNJOpc = RISCV::FSGNJ_D_INX;
22620 FSGNJXOpc = RISCV::FSGNJX_D_INX;
22621 RC = &RISCV::GPRRegClass;
22622 break;
22623 }
22624
22625 const BasicBlock *BB = MBB->getBasicBlock();
22626 DebugLoc DL = MI.getDebugLoc();
22627 MachineFunction::iterator I = ++MBB->getIterator();
22628
22629 MachineFunction *F = MBB->getParent();
22630 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
22631 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
22632
22633 F->insert(I, CvtMBB);
22634 F->insert(I, DoneMBB);
22635 // Move all instructions after the sequence to DoneMBB.
22636 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
22637 MBB->end());
22638 // Update machine-CFG edges by transferring all successors of the current
22639 // block to the new block which will contain the Phi nodes for the selects.
22641 // Set the successors for MBB.
22642 MBB->addSuccessor(CvtMBB);
22643 MBB->addSuccessor(DoneMBB);
22644
22645 Register DstReg = MI.getOperand(0).getReg();
22646 Register SrcReg = MI.getOperand(1).getReg();
22647 Register MaxReg = MI.getOperand(2).getReg();
22648 int64_t FRM = MI.getOperand(3).getImm();
22649
22650 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22651 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
22652
22653 Register FabsReg = MRI.createVirtualRegister(RC);
22654 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
22655
22656 // Compare the FP value to the max value.
22657 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22658 auto MIB =
22659 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
22662
22663 // Insert branch.
22664 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
22665 .addReg(CmpReg)
22666 .addReg(RISCV::X0)
22667 .addMBB(DoneMBB);
22668
22669 CvtMBB->addSuccessor(DoneMBB);
22670
22671 // Convert to integer.
22672 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22673 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
22676
22677 // Convert back to FP.
22678 Register I2FReg = MRI.createVirtualRegister(RC);
22679 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
22682
22683 // Restore the sign bit.
22684 Register CvtReg = MRI.createVirtualRegister(RC);
22685 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
22686
22687 // Merge the results.
22688 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
22689 .addReg(SrcReg)
22690 .addMBB(MBB)
22691 .addReg(CvtReg)
22692 .addMBB(CvtMBB);
22693
22694 MI.eraseFromParent();
22695 return DoneMBB;
22696}
22697
22700 MachineBasicBlock *BB) const {
22701 switch (MI.getOpcode()) {
22702 default:
22703 llvm_unreachable("Unexpected instr type to insert");
22704 case RISCV::ReadCounterWide:
22705 assert(!Subtarget.is64Bit() &&
22706 "ReadCounterWide is only to be used on riscv32");
22707 return emitReadCounterWidePseudo(MI, BB);
22708 case RISCV::Select_GPR_Using_CC_GPR:
22709 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
22710 case RISCV::Select_GPR_Using_CC_SImm5_CV:
22711 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
22712 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
22713 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
22714 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
22715 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
22716 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
22717 case RISCV::Select_FPR16_Using_CC_GPR:
22718 case RISCV::Select_FPR16INX_Using_CC_GPR:
22719 case RISCV::Select_FPR32_Using_CC_GPR:
22720 case RISCV::Select_FPR32INX_Using_CC_GPR:
22721 case RISCV::Select_FPR64_Using_CC_GPR:
22722 case RISCV::Select_FPR64INX_Using_CC_GPR:
22723 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
22724 return emitSelectPseudo(MI, BB, Subtarget);
22725 case RISCV::BuildPairF64Pseudo:
22726 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
22727 case RISCV::SplitF64Pseudo:
22728 return emitSplitF64Pseudo(MI, BB, Subtarget);
22729 case RISCV::PseudoQuietFLE_H:
22730 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
22731 case RISCV::PseudoQuietFLE_H_INX:
22732 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
22733 case RISCV::PseudoQuietFLT_H:
22734 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
22735 case RISCV::PseudoQuietFLT_H_INX:
22736 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
22737 case RISCV::PseudoQuietFLE_S:
22738 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
22739 case RISCV::PseudoQuietFLE_S_INX:
22740 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
22741 case RISCV::PseudoQuietFLT_S:
22742 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
22743 case RISCV::PseudoQuietFLT_S_INX:
22744 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
22745 case RISCV::PseudoQuietFLE_D:
22746 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
22747 case RISCV::PseudoQuietFLE_D_INX:
22748 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
22749 case RISCV::PseudoQuietFLE_D_IN32X:
22750 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
22751 Subtarget);
22752 case RISCV::PseudoQuietFLT_D:
22753 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
22754 case RISCV::PseudoQuietFLT_D_INX:
22755 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
22756 case RISCV::PseudoQuietFLT_D_IN32X:
22757 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
22758 Subtarget);
22759
22760 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
22761 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
22762 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
22763 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
22764 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
22765 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
22766 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
22767 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
22768 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
22769 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
22770 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
22771 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
22772 case RISCV::PseudoFROUND_H:
22773 case RISCV::PseudoFROUND_H_INX:
22774 case RISCV::PseudoFROUND_S:
22775 case RISCV::PseudoFROUND_S_INX:
22776 case RISCV::PseudoFROUND_D:
22777 case RISCV::PseudoFROUND_D_INX:
22778 case RISCV::PseudoFROUND_D_IN32X:
22779 return emitFROUND(MI, BB, Subtarget);
22780 case RISCV::PROBED_STACKALLOC_DYN:
22781 return emitDynamicProbedAlloc(MI, BB);
22782 case TargetOpcode::STATEPOINT:
22783 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
22784 // while jal call instruction (where statepoint will be lowered at the end)
22785 // has implicit def. This def is early-clobber as it will be set at
22786 // the moment of the call and earlier than any use is read.
22787 // Add this implicit dead def here as a workaround.
22788 MI.addOperand(*MI.getMF(),
22790 RISCV::X1, /*isDef*/ true,
22791 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
22792 /*isUndef*/ false, /*isEarlyClobber*/ true));
22793 [[fallthrough]];
22794 case TargetOpcode::STACKMAP:
22795 case TargetOpcode::PATCHPOINT:
22796 if (!Subtarget.is64Bit())
22797 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
22798 "supported on 64-bit targets");
22799 return emitPatchPoint(MI, BB);
22800 }
22801}
22802
22804 SDNode *Node) const {
22805 // If instruction defines FRM operand, conservatively set it as non-dead to
22806 // express data dependency with FRM users and prevent incorrect instruction
22807 // reordering.
22808 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
22809 FRMDef->setIsDead(false);
22810 return;
22811 }
22812 // Add FRM dependency to any instructions with dynamic rounding mode.
22813 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
22814 if (Idx < 0) {
22815 // Vector pseudos have FRM index indicated by TSFlags.
22816 Idx = RISCVII::getFRMOpNum(MI.getDesc());
22817 if (Idx < 0)
22818 return;
22819 }
22820 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
22821 return;
22822 // If the instruction already reads FRM, don't add another read.
22823 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
22824 return;
22825 MI.addOperand(
22826 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
22827}
22828
22829void RISCVTargetLowering::analyzeInputArgs(
22830 MachineFunction &MF, CCState &CCInfo,
22831 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
22832 RISCVCCAssignFn Fn) const {
22833 for (const auto &[Idx, In] : enumerate(Ins)) {
22834 MVT ArgVT = In.VT;
22835 ISD::ArgFlagsTy ArgFlags = In.Flags;
22836
22837 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22838 In.OrigTy)) {
22839 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
22840 << ArgVT << '\n');
22841 llvm_unreachable(nullptr);
22842 }
22843 }
22844}
22845
22846void RISCVTargetLowering::analyzeOutputArgs(
22847 MachineFunction &MF, CCState &CCInfo,
22848 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
22849 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
22850 for (const auto &[Idx, Out] : enumerate(Outs)) {
22851 MVT ArgVT = Out.VT;
22852 ISD::ArgFlagsTy ArgFlags = Out.Flags;
22853
22854 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
22855 Out.OrigTy)) {
22856 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
22857 << ArgVT << "\n");
22858 llvm_unreachable(nullptr);
22859 }
22860 }
22861}
22862
22863// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
22864// values.
22866 const CCValAssign &VA, const SDLoc &DL,
22867 const RISCVSubtarget &Subtarget) {
22868 if (VA.needsCustom()) {
22869 if (VA.getLocVT().isInteger() &&
22870 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22871 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
22872 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
22873 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
22875 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
22876 llvm_unreachable("Unexpected Custom handling.");
22877 }
22878
22879 switch (VA.getLocInfo()) {
22880 default:
22881 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22882 case CCValAssign::Full:
22883 break;
22884 case CCValAssign::BCvt:
22885 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
22886 break;
22887 }
22888 return Val;
22889}
22890
22891// The caller is responsible for loading the full value if the argument is
22892// passed with CCValAssign::Indirect.
22894 const CCValAssign &VA, const SDLoc &DL,
22895 const ISD::InputArg &In,
22896 const RISCVTargetLowering &TLI) {
22899 EVT LocVT = VA.getLocVT();
22900 SDValue Val;
22901 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
22902 Register VReg = RegInfo.createVirtualRegister(RC);
22903 RegInfo.addLiveIn(VA.getLocReg(), VReg);
22904 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
22905
22906 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
22907 if (In.isOrigArg()) {
22908 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
22909 if (OrigArg->getType()->isIntegerTy()) {
22910 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
22911 // An input zero extended from i31 can also be considered sign extended.
22912 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
22913 (BitWidth < 32 && In.Flags.isZExt())) {
22915 RVFI->addSExt32Register(VReg);
22916 }
22917 }
22918 }
22919
22921 return Val;
22922
22923 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
22924}
22925
22927 const CCValAssign &VA, const SDLoc &DL,
22928 const RISCVSubtarget &Subtarget) {
22929 EVT LocVT = VA.getLocVT();
22930
22931 if (VA.needsCustom()) {
22932 if (LocVT.isInteger() &&
22933 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
22934 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
22935 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
22936 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
22937 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
22938 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
22939 llvm_unreachable("Unexpected Custom handling.");
22940 }
22941
22942 switch (VA.getLocInfo()) {
22943 default:
22944 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22945 case CCValAssign::Full:
22946 break;
22947 case CCValAssign::BCvt:
22948 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
22949 break;
22950 }
22951 return Val;
22952}
22953
22954// The caller is responsible for loading the full value if the argument is
22955// passed with CCValAssign::Indirect.
22957 const CCValAssign &VA, const SDLoc &DL) {
22959 MachineFrameInfo &MFI = MF.getFrameInfo();
22960 EVT LocVT = VA.getLocVT();
22961 EVT ValVT = VA.getValVT();
22963 if (VA.getLocInfo() == CCValAssign::Indirect) {
22964 // When the value is a scalable vector, we save the pointer which points to
22965 // the scalable vector value in the stack. The ValVT will be the pointer
22966 // type, instead of the scalable vector type.
22967 ValVT = LocVT;
22968 }
22969 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
22970 /*IsImmutable=*/true);
22971 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
22972 SDValue Val;
22973
22975 switch (VA.getLocInfo()) {
22976 default:
22977 llvm_unreachable("Unexpected CCValAssign::LocInfo");
22978 case CCValAssign::Full:
22980 case CCValAssign::BCvt:
22981 break;
22982 }
22983 Val = DAG.getExtLoad(
22984 ExtType, DL, LocVT, Chain, FIN,
22986 return Val;
22987}
22988
22990 const CCValAssign &VA,
22991 const CCValAssign &HiVA,
22992 const SDLoc &DL) {
22993 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
22994 "Unexpected VA");
22996 MachineFrameInfo &MFI = MF.getFrameInfo();
22998
22999 assert(VA.isRegLoc() && "Expected register VA assignment");
23000
23001 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
23002 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
23003 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
23004 SDValue Hi;
23005 if (HiVA.isMemLoc()) {
23006 // Second half of f64 is passed on the stack.
23007 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
23008 /*IsImmutable=*/true);
23009 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
23010 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
23012 } else {
23013 // Second half of f64 is passed in another GPR.
23014 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
23015 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
23016 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
23017 }
23018 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
23019}
23020
23021// Transform physical registers into virtual registers.
23023 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
23024 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
23025 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
23026
23028
23029 switch (CallConv) {
23030 default:
23031 reportFatalUsageError("Unsupported calling convention");
23032 case CallingConv::C:
23033 case CallingConv::Fast:
23036 case CallingConv::GRAAL:
23038#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
23039 CC_VLS_CASE(32)
23040 CC_VLS_CASE(64)
23041 CC_VLS_CASE(128)
23042 CC_VLS_CASE(256)
23043 CC_VLS_CASE(512)
23044 CC_VLS_CASE(1024)
23045 CC_VLS_CASE(2048)
23046 CC_VLS_CASE(4096)
23047 CC_VLS_CASE(8192)
23048 CC_VLS_CASE(16384)
23049 CC_VLS_CASE(32768)
23050 CC_VLS_CASE(65536)
23051#undef CC_VLS_CASE
23052 break;
23053 case CallingConv::GHC:
23054 if (Subtarget.hasStdExtE())
23055 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23056 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
23057 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
23058 "(Zdinx/D) instruction set extensions");
23059 }
23060
23061 const Function &Func = MF.getFunction();
23062 if (Func.hasFnAttribute("interrupt")) {
23063 if (!Func.arg_empty())
23065 "Functions with the interrupt attribute cannot have arguments!");
23066
23067 StringRef Kind =
23068 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23069
23070 constexpr StringLiteral SupportedInterruptKinds[] = {
23071 "machine",
23072 "supervisor",
23073 "rnmi",
23074 "qci-nest",
23075 "qci-nonest",
23076 "SiFive-CLIC-preemptible",
23077 "SiFive-CLIC-stack-swap",
23078 "SiFive-CLIC-preemptible-stack-swap",
23079 };
23080 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
23082 "Function interrupt attribute argument not supported!");
23083
23084 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
23086 "'qci-*' interrupt kinds require Xqciint extension");
23087
23088 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
23090 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
23091
23092 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
23093 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
23094 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
23095 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
23096 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
23097 "have a frame pointer");
23098 }
23099
23100 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23101 MVT XLenVT = Subtarget.getXLenVT();
23102 unsigned XLenInBytes = Subtarget.getXLen() / 8;
23103 // Used with vargs to accumulate store chains.
23104 std::vector<SDValue> OutChains;
23105
23106 // Assign locations to all of the incoming arguments.
23108 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23109
23110 if (CallConv == CallingConv::GHC)
23112 else
23113 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
23115 : CC_RISCV);
23116
23117 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
23118 CCValAssign &VA = ArgLocs[i];
23119 SDValue ArgValue;
23120 // Passing f64 on RV32D with a soft float ABI must be handled as a special
23121 // case.
23122 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23123 assert(VA.needsCustom());
23124 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
23125 } else if (VA.isRegLoc())
23126 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
23127 else
23128 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
23129
23130 if (VA.getLocInfo() == CCValAssign::Indirect) {
23131 // If the original argument was split and passed by reference (e.g. i128
23132 // on RV32), we need to load all parts of it here (using the same
23133 // address). Vectors may be partly split to registers and partly to the
23134 // stack, in which case the base address is partly offset and subsequent
23135 // stores are relative to that.
23136 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
23138 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
23139 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
23140 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23141 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
23142 CCValAssign &PartVA = ArgLocs[i + 1];
23143 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
23144 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23145 if (PartVA.getValVT().isScalableVector())
23146 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23147 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
23148 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
23150 ++i;
23151 ++InsIdx;
23152 }
23153 continue;
23154 }
23155 InVals.push_back(ArgValue);
23156 }
23157
23158 if (any_of(ArgLocs,
23159 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23161
23162 if (IsVarArg) {
23163 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
23164 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
23165 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
23166 MachineFrameInfo &MFI = MF.getFrameInfo();
23167 MachineRegisterInfo &RegInfo = MF.getRegInfo();
23169
23170 // Size of the vararg save area. For now, the varargs save area is either
23171 // zero or large enough to hold a0-a7.
23172 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
23173 int FI;
23174
23175 // If all registers are allocated, then all varargs must be passed on the
23176 // stack and we don't need to save any argregs.
23177 if (VarArgsSaveSize == 0) {
23178 int VaArgOffset = CCInfo.getStackSize();
23179 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
23180 } else {
23181 int VaArgOffset = -VarArgsSaveSize;
23182 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
23183
23184 // If saving an odd number of registers then create an extra stack slot to
23185 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
23186 // offsets to even-numbered registered remain 2*XLEN-aligned.
23187 if (Idx % 2) {
23189 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
23190 VarArgsSaveSize += XLenInBytes;
23191 }
23192
23193 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
23194
23195 // Copy the integer registers that may have been used for passing varargs
23196 // to the vararg save area.
23197 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
23198 const Register Reg = RegInfo.createVirtualRegister(RC);
23199 RegInfo.addLiveIn(ArgRegs[I], Reg);
23200 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
23201 SDValue Store = DAG.getStore(
23202 Chain, DL, ArgValue, FIN,
23203 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
23204 OutChains.push_back(Store);
23205 FIN =
23206 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
23207 }
23208 }
23209
23210 // Record the frame index of the first variable argument
23211 // which is a value necessary to VASTART.
23212 RVFI->setVarArgsFrameIndex(FI);
23213 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23214 }
23215
23216 // All stores are grouped in one node to allow the matching between
23217 // the size of Ins and InVals. This only happens for vararg functions.
23218 if (!OutChains.empty()) {
23219 OutChains.push_back(Chain);
23220 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23221 }
23222
23223 return Chain;
23224}
23225
23226/// isEligibleForTailCallOptimization - Check whether the call is eligible
23227/// for tail call optimization.
23228/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23229bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23230 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23231 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23232
23233 auto CalleeCC = CLI.CallConv;
23234 auto &Outs = CLI.Outs;
23235 auto &Caller = MF.getFunction();
23236 auto CallerCC = Caller.getCallingConv();
23237
23238 // Exception-handling functions need a special set of instructions to
23239 // indicate a return to the hardware. Tail-calling another function would
23240 // probably break this.
23241 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23242 // should be expanded as new function attributes are introduced.
23243 if (Caller.hasFnAttribute("interrupt"))
23244 return false;
23245
23246 // Do not tail call opt if the stack is used to pass parameters.
23247 if (CCInfo.getStackSize() != 0)
23248 return false;
23249
23250 // Do not tail call opt if any parameters need to be passed indirectly.
23251 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23252 // passed indirectly. So the address of the value will be passed in a
23253 // register, or if not available, then the address is put on the stack. In
23254 // order to pass indirectly, space on the stack often needs to be allocated
23255 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23256 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23257 // are passed CCValAssign::Indirect.
23258 for (auto &VA : ArgLocs)
23259 if (VA.getLocInfo() == CCValAssign::Indirect)
23260 return false;
23261
23262 // Do not tail call opt if either caller or callee uses struct return
23263 // semantics.
23264 auto IsCallerStructRet = Caller.hasStructRetAttr();
23265 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23266 if (IsCallerStructRet || IsCalleeStructRet)
23267 return false;
23268
23269 // The callee has to preserve all registers the caller needs to preserve.
23270 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23271 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23272 if (CalleeCC != CallerCC) {
23273 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23274 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23275 return false;
23276 }
23277
23278 // Byval parameters hand the function a pointer directly into the stack area
23279 // we want to reuse during a tail call. Working around this *is* possible
23280 // but less efficient and uglier in LowerCall.
23281 for (auto &Arg : Outs)
23282 if (Arg.Flags.isByVal())
23283 return false;
23284
23285 return true;
23286}
23287
23289 return DAG.getDataLayout().getPrefTypeAlign(
23290 VT.getTypeForEVT(*DAG.getContext()));
23291}
23292
23293// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23294// and output parameter nodes.
23296 SmallVectorImpl<SDValue> &InVals) const {
23297 SelectionDAG &DAG = CLI.DAG;
23298 SDLoc &DL = CLI.DL;
23300 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23302 SDValue Chain = CLI.Chain;
23303 SDValue Callee = CLI.Callee;
23304 bool &IsTailCall = CLI.IsTailCall;
23305 CallingConv::ID CallConv = CLI.CallConv;
23306 bool IsVarArg = CLI.IsVarArg;
23307 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23308 MVT XLenVT = Subtarget.getXLenVT();
23309 const CallBase *CB = CLI.CB;
23310
23313
23314 // Set type id for call site info.
23315 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23316 CSInfo = MachineFunction::CallSiteInfo(*CB);
23317
23318 // Analyze the operands of the call, assigning locations to each operand.
23320 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23321
23322 if (CallConv == CallingConv::GHC) {
23323 if (Subtarget.hasStdExtE())
23324 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23325 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23326 } else
23327 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23329 : CC_RISCV);
23330
23331 // Check if it's really possible to do a tail call.
23332 if (IsTailCall)
23333 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23334
23335 if (IsTailCall)
23336 ++NumTailCalls;
23337 else if (CLI.CB && CLI.CB->isMustTailCall())
23338 reportFatalInternalError("failed to perform tail call elimination on a "
23339 "call site marked musttail");
23340
23341 // Get a count of how many bytes are to be pushed on the stack.
23342 unsigned NumBytes = ArgCCInfo.getStackSize();
23343
23344 // Create local copies for byval args
23345 SmallVector<SDValue, 8> ByValArgs;
23346 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23347 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23348 if (!Flags.isByVal())
23349 continue;
23350
23351 SDValue Arg = OutVals[i];
23352 unsigned Size = Flags.getByValSize();
23353 Align Alignment = Flags.getNonZeroByValAlign();
23354
23355 int FI =
23356 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23357 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23358 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23359
23360 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23361 /*IsVolatile=*/false,
23362 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23364 ByValArgs.push_back(FIPtr);
23365 }
23366
23367 if (!IsTailCall)
23368 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23369
23370 // Copy argument values to their designated locations.
23372 SmallVector<SDValue, 8> MemOpChains;
23373 SDValue StackPtr;
23374 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23375 ++i, ++OutIdx) {
23376 CCValAssign &VA = ArgLocs[i];
23377 SDValue ArgValue = OutVals[OutIdx];
23378 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23379
23380 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23381 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23382 assert(VA.isRegLoc() && "Expected register VA assignment");
23383 assert(VA.needsCustom());
23384 SDValue SplitF64 = DAG.getNode(
23385 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23386 SDValue Lo = SplitF64.getValue(0);
23387 SDValue Hi = SplitF64.getValue(1);
23388
23389 Register RegLo = VA.getLocReg();
23390 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23391
23392 // Get the CCValAssign for the Hi part.
23393 CCValAssign &HiVA = ArgLocs[++i];
23394
23395 if (HiVA.isMemLoc()) {
23396 // Second half of f64 is passed on the stack.
23397 if (!StackPtr.getNode())
23398 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23400 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23401 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23402 // Emit the store.
23403 MemOpChains.push_back(DAG.getStore(
23404 Chain, DL, Hi, Address,
23406 } else {
23407 // Second half of f64 is passed in another GPR.
23408 Register RegHigh = HiVA.getLocReg();
23409 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23410 }
23411 continue;
23412 }
23413
23414 // Promote the value if needed.
23415 // For now, only handle fully promoted and indirect arguments.
23416 if (VA.getLocInfo() == CCValAssign::Indirect) {
23417 // Store the argument in a stack slot and pass its address.
23418 Align StackAlign =
23419 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23420 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23421 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23422 // If the original argument was split (e.g. i128), we need
23423 // to store the required parts of it here (and pass just one address).
23424 // Vectors may be partly split to registers and partly to the stack, in
23425 // which case the base address is partly offset and subsequent stores are
23426 // relative to that.
23427 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23428 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23429 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23430 // Calculate the total size to store. We don't have access to what we're
23431 // actually storing other than performing the loop and collecting the
23432 // info.
23434 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23435 SDValue PartValue = OutVals[OutIdx + 1];
23436 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23437 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23438 EVT PartVT = PartValue.getValueType();
23439 if (PartVT.isScalableVector())
23440 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23441 StoredSize += PartVT.getStoreSize();
23442 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23443 Parts.push_back(std::make_pair(PartValue, Offset));
23444 ++i;
23445 ++OutIdx;
23446 }
23447 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23448 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23449 MemOpChains.push_back(
23450 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23452 for (const auto &Part : Parts) {
23453 SDValue PartValue = Part.first;
23454 SDValue PartOffset = Part.second;
23456 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23457 MemOpChains.push_back(
23458 DAG.getStore(Chain, DL, PartValue, Address,
23460 }
23461 ArgValue = SpillSlot;
23462 } else {
23463 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23464 }
23465
23466 // Use local copy if it is a byval arg.
23467 if (Flags.isByVal())
23468 ArgValue = ByValArgs[j++];
23469
23470 if (VA.isRegLoc()) {
23471 // Queue up the argument copies and emit them at the end.
23472 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23473
23474 const TargetOptions &Options = DAG.getTarget().Options;
23475 if (Options.EmitCallSiteInfo)
23476 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23477 } else {
23478 assert(VA.isMemLoc() && "Argument not register or memory");
23479 assert(!IsTailCall && "Tail call not allowed if stack is used "
23480 "for passing parameters");
23481
23482 // Work out the address of the stack slot.
23483 if (!StackPtr.getNode())
23484 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23486 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23488
23489 // Emit the store.
23490 MemOpChains.push_back(
23491 DAG.getStore(Chain, DL, ArgValue, Address,
23493 }
23494 }
23495
23496 // Join the stores, which are independent of one another.
23497 if (!MemOpChains.empty())
23498 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23499
23500 SDValue Glue;
23501
23502 // Build a sequence of copy-to-reg nodes, chained and glued together.
23503 for (auto &Reg : RegsToPass) {
23504 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23505 Glue = Chain.getValue(1);
23506 }
23507
23508 // Validate that none of the argument registers have been marked as
23509 // reserved, if so report an error. Do the same for the return address if this
23510 // is not a tailcall.
23511 validateCCReservedRegs(RegsToPass, MF);
23512 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
23514 MF.getFunction(),
23515 "Return address register required, but has been reserved."});
23516
23517 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
23518 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
23519 // split it and then direct call can be matched by PseudoCALL.
23520 bool CalleeIsLargeExternalSymbol = false;
23522 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
23523 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
23524 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23525 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
23526 CalleeIsLargeExternalSymbol = true;
23527 }
23528 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
23529 const GlobalValue *GV = S->getGlobal();
23530 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
23531 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
23532 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
23533 }
23534
23535 // The first call operand is the chain and the second is the target address.
23537 Ops.push_back(Chain);
23538 Ops.push_back(Callee);
23539
23540 // Add argument registers to the end of the list so that they are
23541 // known live into the call.
23542 for (auto &Reg : RegsToPass)
23543 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
23544
23545 // Add a register mask operand representing the call-preserved registers.
23546 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
23547 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
23548 assert(Mask && "Missing call preserved mask for calling convention");
23549 Ops.push_back(DAG.getRegisterMask(Mask));
23550
23551 // Glue the call to the argument copies, if any.
23552 if (Glue.getNode())
23553 Ops.push_back(Glue);
23554
23555 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
23556 "Unexpected CFI type for a direct call");
23557
23558 // Emit the call.
23559 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
23560
23561 // Use software guarded branch for large code model non-indirect calls
23562 // Tail call to external symbol will have a null CLI.CB and we need another
23563 // way to determine the callsite type
23564 bool NeedSWGuarded = false;
23566 Subtarget.hasStdExtZicfilp() &&
23567 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
23568 NeedSWGuarded = true;
23569
23570 if (IsTailCall) {
23572 unsigned CallOpc =
23573 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
23574 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23575 if (CLI.CFIType)
23576 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23577 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
23578 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
23579 return Ret;
23580 }
23581
23582 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
23583 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
23584 if (CLI.CFIType)
23585 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
23586
23587 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
23588 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
23589 Glue = Chain.getValue(1);
23590
23591 // Mark the end of the call, which is glued to the call itself.
23592 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
23593 Glue = Chain.getValue(1);
23594
23595 // Assign locations to each value returned by this call.
23597 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
23598 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
23599
23600 // Copy all of the result registers out of their specified physreg.
23601 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
23602 auto &VA = RVLocs[i];
23603 // Copy the value out
23604 SDValue RetValue =
23605 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
23606 // Glue the RetValue to the end of the call sequence
23607 Chain = RetValue.getValue(1);
23608 Glue = RetValue.getValue(2);
23609
23610 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23611 assert(VA.needsCustom());
23612 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
23613 MVT::i32, Glue);
23614 Chain = RetValue2.getValue(1);
23615 Glue = RetValue2.getValue(2);
23616 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
23617 RetValue2);
23618 } else
23619 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
23620
23621 InVals.push_back(RetValue);
23622 }
23623
23624 return Chain;
23625}
23626
23628 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
23629 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
23630 const Type *RetTy) const {
23632 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
23633
23634 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23635 MVT VT = Outs[i].VT;
23636 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
23637 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
23638 /*IsRet=*/true, Outs[i].OrigTy))
23639 return false;
23640 }
23641 return true;
23642}
23643
23644SDValue
23646 bool IsVarArg,
23648 const SmallVectorImpl<SDValue> &OutVals,
23649 const SDLoc &DL, SelectionDAG &DAG) const {
23651
23652 // Stores the assignment of the return value to a location.
23654
23655 // Info about the registers and stack slot.
23656 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
23657 *DAG.getContext());
23658
23659 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
23660 nullptr, CC_RISCV);
23661
23662 if (CallConv == CallingConv::GHC && !RVLocs.empty())
23663 reportFatalUsageError("GHC functions return void only");
23664
23665 SDValue Glue;
23666 SmallVector<SDValue, 4> RetOps(1, Chain);
23667
23668 // Copy the result values into the output registers.
23669 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
23670 SDValue Val = OutVals[OutIdx];
23671 CCValAssign &VA = RVLocs[i];
23672 assert(VA.isRegLoc() && "Can only return in registers!");
23673
23674 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23675 // Handle returning f64 on RV32D with a soft float ABI.
23676 assert(VA.isRegLoc() && "Expected return via registers");
23677 assert(VA.needsCustom());
23678 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
23679 DAG.getVTList(MVT::i32, MVT::i32), Val);
23680 SDValue Lo = SplitF64.getValue(0);
23681 SDValue Hi = SplitF64.getValue(1);
23682 Register RegLo = VA.getLocReg();
23683 Register RegHi = RVLocs[++i].getLocReg();
23684
23685 if (Subtarget.isRegisterReservedByUser(RegLo) ||
23686 Subtarget.isRegisterReservedByUser(RegHi))
23688 MF.getFunction(),
23689 "Return value register required, but has been reserved."});
23690
23691 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
23692 Glue = Chain.getValue(1);
23693 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
23694 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
23695 Glue = Chain.getValue(1);
23696 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
23697 } else {
23698 // Handle a 'normal' return.
23699 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
23700 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
23701
23702 if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))
23704 MF.getFunction(),
23705 "Return value register required, but has been reserved."});
23706
23707 // Guarantee that all emitted copies are stuck together.
23708 Glue = Chain.getValue(1);
23709 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
23710 }
23711 }
23712
23713 RetOps[0] = Chain; // Update chain.
23714
23715 // Add the glue node if we have it.
23716 if (Glue.getNode()) {
23717 RetOps.push_back(Glue);
23718 }
23719
23720 if (any_of(RVLocs,
23721 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23723
23724 unsigned RetOpc = RISCVISD::RET_GLUE;
23725 // Interrupt service routines use different return instructions.
23726 const Function &Func = DAG.getMachineFunction().getFunction();
23727 if (Func.hasFnAttribute("interrupt")) {
23728 if (!Func.getReturnType()->isVoidTy())
23730 "Functions with the interrupt attribute must have void return type!");
23731
23733 StringRef Kind =
23734 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23735
23736 if (Kind == "supervisor")
23737 RetOpc = RISCVISD::SRET_GLUE;
23738 else if (Kind == "rnmi") {
23739 assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
23740 "Need Smrnmi extension for rnmi");
23741 RetOpc = RISCVISD::MNRET_GLUE;
23742 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
23743 assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&
23744 "Need Xqciint for qci-(no)nest");
23745 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
23746 } else
23747 RetOpc = RISCVISD::MRET_GLUE;
23748 }
23749
23750 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
23751}
23752
23753void RISCVTargetLowering::validateCCReservedRegs(
23754 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
23755 MachineFunction &MF) const {
23756 const Function &F = MF.getFunction();
23757
23758 if (llvm::any_of(Regs, [this](auto Reg) {
23759 return Subtarget.isRegisterReservedByUser(Reg.first);
23760 }))
23761 F.getContext().diagnose(DiagnosticInfoUnsupported{
23762 F, "Argument register required, but has been reserved."});
23763}
23764
23765// Check if the result of the node is only used as a return value, as
23766// otherwise we can't perform a tail-call.
23768 if (N->getNumValues() != 1)
23769 return false;
23770 if (!N->hasNUsesOfValue(1, 0))
23771 return false;
23772
23773 SDNode *Copy = *N->user_begin();
23774
23775 if (Copy->getOpcode() == ISD::BITCAST) {
23776 return isUsedByReturnOnly(Copy, Chain);
23777 }
23778
23779 // TODO: Handle additional opcodes in order to support tail-calling libcalls
23780 // with soft float ABIs.
23781 if (Copy->getOpcode() != ISD::CopyToReg) {
23782 return false;
23783 }
23784
23785 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
23786 // isn't safe to perform a tail call.
23787 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
23788 return false;
23789
23790 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
23791 bool HasRet = false;
23792 for (SDNode *Node : Copy->users()) {
23793 if (Node->getOpcode() != RISCVISD::RET_GLUE)
23794 return false;
23795 HasRet = true;
23796 }
23797 if (!HasRet)
23798 return false;
23799
23800 Chain = Copy->getOperand(0);
23801 return true;
23802}
23803
23805 return CI->isTailCall();
23806}
23807
23808/// getConstraintType - Given a constraint letter, return the type of
23809/// constraint it is for this target.
23812 if (Constraint.size() == 1) {
23813 switch (Constraint[0]) {
23814 default:
23815 break;
23816 case 'f':
23817 case 'R':
23818 return C_RegisterClass;
23819 case 'I':
23820 case 'J':
23821 case 'K':
23822 return C_Immediate;
23823 case 'A':
23824 return C_Memory;
23825 case 's':
23826 case 'S': // A symbolic address
23827 return C_Other;
23828 }
23829 } else {
23830 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
23831 return C_RegisterClass;
23832 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
23833 return C_RegisterClass;
23834 }
23835 return TargetLowering::getConstraintType(Constraint);
23836}
23837
23838std::pair<unsigned, const TargetRegisterClass *>
23840 StringRef Constraint,
23841 MVT VT) const {
23842 // First, see if this is a constraint that directly corresponds to a RISC-V
23843 // register class.
23844 if (Constraint.size() == 1) {
23845 switch (Constraint[0]) {
23846 case 'r':
23847 // TODO: Support fixed vectors up to XLen for P extension?
23848 if (VT.isVector())
23849 break;
23850 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23851 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23852 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23853 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23854 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23855 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23856 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23857 case 'f':
23858 if (VT == MVT::f16) {
23859 if (Subtarget.hasStdExtZfhmin())
23860 return std::make_pair(0U, &RISCV::FPR16RegClass);
23861 if (Subtarget.hasStdExtZhinxmin())
23862 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
23863 } else if (VT == MVT::f32) {
23864 if (Subtarget.hasStdExtF())
23865 return std::make_pair(0U, &RISCV::FPR32RegClass);
23866 if (Subtarget.hasStdExtZfinx())
23867 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
23868 } else if (VT == MVT::f64) {
23869 if (Subtarget.hasStdExtD())
23870 return std::make_pair(0U, &RISCV::FPR64RegClass);
23871 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23872 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23873 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23874 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
23875 }
23876 break;
23877 case 'R':
23878 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23879 (VT == MVT::i128 && Subtarget.is64Bit()))
23880 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
23881 break;
23882 default:
23883 break;
23884 }
23885 } else if (Constraint == "vr") {
23886 for (const auto *RC :
23887 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
23888 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
23889 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
23890 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
23891 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
23892 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
23893 &RISCV::VRN2M4RegClass}) {
23894 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23895 return std::make_pair(0U, RC);
23896
23897 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23898 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23899 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23900 return std::make_pair(0U, RC);
23901 }
23902 }
23903 } else if (Constraint == "vd") {
23904 for (const auto *RC :
23905 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
23906 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
23907 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
23908 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
23909 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
23910 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
23911 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
23912 &RISCV::VRN2M4NoV0RegClass}) {
23913 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
23914 return std::make_pair(0U, RC);
23915
23916 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23917 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23918 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
23919 return std::make_pair(0U, RC);
23920 }
23921 }
23922 } else if (Constraint == "vm") {
23923 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
23924 return std::make_pair(0U, &RISCV::VMV0RegClass);
23925
23926 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
23927 MVT ContainerVT = getContainerForFixedLengthVector(VT);
23928 // VT here might be coerced to vector with i8 elements, so we need to
23929 // check if this is a M1 register here instead of checking VMV0RegClass.
23930 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
23931 return std::make_pair(0U, &RISCV::VMV0RegClass);
23932 }
23933 } else if (Constraint == "cr") {
23934 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
23935 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23936 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
23937 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23938 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23939 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23940 if (!VT.isVector())
23941 return std::make_pair(0U, &RISCV::GPRCRegClass);
23942 } else if (Constraint == "cR") {
23943 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
23944 (VT == MVT::i128 && Subtarget.is64Bit()))
23945 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23946 } else if (Constraint == "cf") {
23947 if (VT == MVT::f16) {
23948 if (Subtarget.hasStdExtZfhmin())
23949 return std::make_pair(0U, &RISCV::FPR16CRegClass);
23950 if (Subtarget.hasStdExtZhinxmin())
23951 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
23952 } else if (VT == MVT::f32) {
23953 if (Subtarget.hasStdExtF())
23954 return std::make_pair(0U, &RISCV::FPR32CRegClass);
23955 if (Subtarget.hasStdExtZfinx())
23956 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
23957 } else if (VT == MVT::f64) {
23958 if (Subtarget.hasStdExtD())
23959 return std::make_pair(0U, &RISCV::FPR64CRegClass);
23960 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
23961 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
23962 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
23963 return std::make_pair(0U, &RISCV::GPRCRegClass);
23964 }
23965 }
23966
23967 // Clang will correctly decode the usage of register name aliases into their
23968 // official names. However, other frontends like `rustc` do not. This allows
23969 // users of these frontends to use the ABI names for registers in LLVM-style
23970 // register constraints.
23971 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
23972 .Case("{zero}", RISCV::X0)
23973 .Case("{ra}", RISCV::X1)
23974 .Case("{sp}", RISCV::X2)
23975 .Case("{gp}", RISCV::X3)
23976 .Case("{tp}", RISCV::X4)
23977 .Case("{t0}", RISCV::X5)
23978 .Case("{t1}", RISCV::X6)
23979 .Case("{t2}", RISCV::X7)
23980 .Cases({"{s0}", "{fp}"}, RISCV::X8)
23981 .Case("{s1}", RISCV::X9)
23982 .Case("{a0}", RISCV::X10)
23983 .Case("{a1}", RISCV::X11)
23984 .Case("{a2}", RISCV::X12)
23985 .Case("{a3}", RISCV::X13)
23986 .Case("{a4}", RISCV::X14)
23987 .Case("{a5}", RISCV::X15)
23988 .Case("{a6}", RISCV::X16)
23989 .Case("{a7}", RISCV::X17)
23990 .Case("{s2}", RISCV::X18)
23991 .Case("{s3}", RISCV::X19)
23992 .Case("{s4}", RISCV::X20)
23993 .Case("{s5}", RISCV::X21)
23994 .Case("{s6}", RISCV::X22)
23995 .Case("{s7}", RISCV::X23)
23996 .Case("{s8}", RISCV::X24)
23997 .Case("{s9}", RISCV::X25)
23998 .Case("{s10}", RISCV::X26)
23999 .Case("{s11}", RISCV::X27)
24000 .Case("{t3}", RISCV::X28)
24001 .Case("{t4}", RISCV::X29)
24002 .Case("{t5}", RISCV::X30)
24003 .Case("{t6}", RISCV::X31)
24004 .Default(RISCV::NoRegister);
24005 if (XRegFromAlias != RISCV::NoRegister)
24006 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
24007
24008 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
24009 // TableGen record rather than the AsmName to choose registers for InlineAsm
24010 // constraints, plus we want to match those names to the widest floating point
24011 // register type available, manually select floating point registers here.
24012 //
24013 // The second case is the ABI name of the register, so that frontends can also
24014 // use the ABI names in register constraint lists.
24015 if (Subtarget.hasStdExtF()) {
24016 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
24017 .Cases({"{f0}", "{ft0}"}, RISCV::F0_F)
24018 .Cases({"{f1}", "{ft1}"}, RISCV::F1_F)
24019 .Cases({"{f2}", "{ft2}"}, RISCV::F2_F)
24020 .Cases({"{f3}", "{ft3}"}, RISCV::F3_F)
24021 .Cases({"{f4}", "{ft4}"}, RISCV::F4_F)
24022 .Cases({"{f5}", "{ft5}"}, RISCV::F5_F)
24023 .Cases({"{f6}", "{ft6}"}, RISCV::F6_F)
24024 .Cases({"{f7}", "{ft7}"}, RISCV::F7_F)
24025 .Cases({"{f8}", "{fs0}"}, RISCV::F8_F)
24026 .Cases({"{f9}", "{fs1}"}, RISCV::F9_F)
24027 .Cases({"{f10}", "{fa0}"}, RISCV::F10_F)
24028 .Cases({"{f11}", "{fa1}"}, RISCV::F11_F)
24029 .Cases({"{f12}", "{fa2}"}, RISCV::F12_F)
24030 .Cases({"{f13}", "{fa3}"}, RISCV::F13_F)
24031 .Cases({"{f14}", "{fa4}"}, RISCV::F14_F)
24032 .Cases({"{f15}", "{fa5}"}, RISCV::F15_F)
24033 .Cases({"{f16}", "{fa6}"}, RISCV::F16_F)
24034 .Cases({"{f17}", "{fa7}"}, RISCV::F17_F)
24035 .Cases({"{f18}", "{fs2}"}, RISCV::F18_F)
24036 .Cases({"{f19}", "{fs3}"}, RISCV::F19_F)
24037 .Cases({"{f20}", "{fs4}"}, RISCV::F20_F)
24038 .Cases({"{f21}", "{fs5}"}, RISCV::F21_F)
24039 .Cases({"{f22}", "{fs6}"}, RISCV::F22_F)
24040 .Cases({"{f23}", "{fs7}"}, RISCV::F23_F)
24041 .Cases({"{f24}", "{fs8}"}, RISCV::F24_F)
24042 .Cases({"{f25}", "{fs9}"}, RISCV::F25_F)
24043 .Cases({"{f26}", "{fs10}"}, RISCV::F26_F)
24044 .Cases({"{f27}", "{fs11}"}, RISCV::F27_F)
24045 .Cases({"{f28}", "{ft8}"}, RISCV::F28_F)
24046 .Cases({"{f29}", "{ft9}"}, RISCV::F29_F)
24047 .Cases({"{f30}", "{ft10}"}, RISCV::F30_F)
24048 .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)
24049 .Default(RISCV::NoRegister);
24050 if (FReg != RISCV::NoRegister) {
24051 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
24052 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
24053 unsigned RegNo = FReg - RISCV::F0_F;
24054 unsigned DReg = RISCV::F0_D + RegNo;
24055 return std::make_pair(DReg, &RISCV::FPR64RegClass);
24056 }
24057 if (VT == MVT::f32 || VT == MVT::Other)
24058 return std::make_pair(FReg, &RISCV::FPR32RegClass);
24059 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
24060 unsigned RegNo = FReg - RISCV::F0_F;
24061 unsigned HReg = RISCV::F0_H + RegNo;
24062 return std::make_pair(HReg, &RISCV::FPR16RegClass);
24063 }
24064 }
24065 }
24066
24067 if (Subtarget.hasVInstructions()) {
24068 Register VReg = StringSwitch<Register>(Constraint.lower())
24069 .Case("{v0}", RISCV::V0)
24070 .Case("{v1}", RISCV::V1)
24071 .Case("{v2}", RISCV::V2)
24072 .Case("{v3}", RISCV::V3)
24073 .Case("{v4}", RISCV::V4)
24074 .Case("{v5}", RISCV::V5)
24075 .Case("{v6}", RISCV::V6)
24076 .Case("{v7}", RISCV::V7)
24077 .Case("{v8}", RISCV::V8)
24078 .Case("{v9}", RISCV::V9)
24079 .Case("{v10}", RISCV::V10)
24080 .Case("{v11}", RISCV::V11)
24081 .Case("{v12}", RISCV::V12)
24082 .Case("{v13}", RISCV::V13)
24083 .Case("{v14}", RISCV::V14)
24084 .Case("{v15}", RISCV::V15)
24085 .Case("{v16}", RISCV::V16)
24086 .Case("{v17}", RISCV::V17)
24087 .Case("{v18}", RISCV::V18)
24088 .Case("{v19}", RISCV::V19)
24089 .Case("{v20}", RISCV::V20)
24090 .Case("{v21}", RISCV::V21)
24091 .Case("{v22}", RISCV::V22)
24092 .Case("{v23}", RISCV::V23)
24093 .Case("{v24}", RISCV::V24)
24094 .Case("{v25}", RISCV::V25)
24095 .Case("{v26}", RISCV::V26)
24096 .Case("{v27}", RISCV::V27)
24097 .Case("{v28}", RISCV::V28)
24098 .Case("{v29}", RISCV::V29)
24099 .Case("{v30}", RISCV::V30)
24100 .Case("{v31}", RISCV::V31)
24101 .Default(RISCV::NoRegister);
24102 if (VReg != RISCV::NoRegister) {
24103 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
24104 return std::make_pair(VReg, &RISCV::VMRegClass);
24105 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
24106 return std::make_pair(VReg, &RISCV::VRRegClass);
24107 for (const auto *RC :
24108 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
24109 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
24110 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
24111 return std::make_pair(VReg, RC);
24112 }
24113 }
24114 }
24115 }
24116
24117 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
24118}
24119
24122 // Currently only support length 1 constraints.
24123 if (ConstraintCode.size() == 1) {
24124 switch (ConstraintCode[0]) {
24125 case 'A':
24127 default:
24128 break;
24129 }
24130 }
24131
24132 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
24133}
24134
24136 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
24137 SelectionDAG &DAG) const {
24138 // Currently only support length 1 constraints.
24139 if (Constraint.size() == 1) {
24140 switch (Constraint[0]) {
24141 case 'I':
24142 // Validate & create a 12-bit signed immediate operand.
24143 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24144 uint64_t CVal = C->getSExtValue();
24145 if (isInt<12>(CVal))
24146 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
24147 Subtarget.getXLenVT()));
24148 }
24149 return;
24150 case 'J':
24151 // Validate & create an integer zero operand.
24152 if (isNullConstant(Op))
24153 Ops.push_back(
24154 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
24155 return;
24156 case 'K':
24157 // Validate & create a 5-bit unsigned immediate operand.
24158 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24159 uint64_t CVal = C->getZExtValue();
24160 if (isUInt<5>(CVal))
24161 Ops.push_back(
24162 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
24163 }
24164 return;
24165 case 'S':
24167 return;
24168 default:
24169 break;
24170 }
24171 }
24173}
24174
24176 Instruction *Inst,
24177 AtomicOrdering Ord) const {
24178 if (Subtarget.hasStdExtZtso()) {
24180 return Builder.CreateFence(Ord);
24181 return nullptr;
24182 }
24183
24185 return Builder.CreateFence(Ord);
24186 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
24187 return Builder.CreateFence(AtomicOrdering::Release);
24188 return nullptr;
24189}
24190
24192 Instruction *Inst,
24193 AtomicOrdering Ord) const {
24194 if (Subtarget.hasStdExtZtso()) {
24196 return Builder.CreateFence(Ord);
24197 return nullptr;
24198 }
24199
24200 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24201 return Builder.CreateFence(AtomicOrdering::Acquire);
24202 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24204 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24205 return nullptr;
24206}
24207
24210 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24211 // point operations can't be used in an lr/sc sequence without breaking the
24212 // forward-progress guarantee.
24213 if (AI->isFloatingPointOperation() ||
24219
24220 // Don't expand forced atomics, we want to have __sync libcalls instead.
24221 if (Subtarget.hasForcedAtomics())
24223
24224 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24225 if (AI->getOperation() == AtomicRMWInst::Nand) {
24226 if (Subtarget.hasStdExtZacas() &&
24227 (Size >= 32 || Subtarget.hasStdExtZabha()))
24229 if (Size < 32)
24231 }
24232
24233 if (Size < 32 && !Subtarget.hasStdExtZabha())
24235
24237}
24238
24239static Intrinsic::ID
24241 switch (BinOp) {
24242 default:
24243 llvm_unreachable("Unexpected AtomicRMW BinOp");
24245 return Intrinsic::riscv_masked_atomicrmw_xchg;
24246 case AtomicRMWInst::Add:
24247 return Intrinsic::riscv_masked_atomicrmw_add;
24248 case AtomicRMWInst::Sub:
24249 return Intrinsic::riscv_masked_atomicrmw_sub;
24251 return Intrinsic::riscv_masked_atomicrmw_nand;
24252 case AtomicRMWInst::Max:
24253 return Intrinsic::riscv_masked_atomicrmw_max;
24254 case AtomicRMWInst::Min:
24255 return Intrinsic::riscv_masked_atomicrmw_min;
24257 return Intrinsic::riscv_masked_atomicrmw_umax;
24259 return Intrinsic::riscv_masked_atomicrmw_umin;
24260 }
24261}
24262
24264 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24265 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24266 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24267 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24268 // mask, as this produces better code than the LR/SC loop emitted by
24269 // int_riscv_masked_atomicrmw_xchg.
24270 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24273 if (CVal->isZero())
24274 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24275 Builder.CreateNot(Mask, "Inv_Mask"),
24276 AI->getAlign(), Ord);
24277 if (CVal->isMinusOne())
24278 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24279 AI->getAlign(), Ord);
24280 }
24281
24282 unsigned XLen = Subtarget.getXLen();
24283 Value *Ordering =
24284 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24285 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24287 AI->getModule(),
24289
24290 if (XLen == 64) {
24291 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24292 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24293 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24294 }
24295
24296 Value *Result;
24297
24298 // Must pass the shift amount needed to sign extend the loaded value prior
24299 // to performing a signed comparison for min/max. ShiftAmt is the number of
24300 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24301 // is the number of bits to left+right shift the value in order to
24302 // sign-extend.
24303 if (AI->getOperation() == AtomicRMWInst::Min ||
24305 const DataLayout &DL = AI->getDataLayout();
24306 unsigned ValWidth =
24307 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24308 Value *SextShamt =
24309 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24310 Result = Builder.CreateCall(LrwOpScwLoop,
24311 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24312 } else {
24313 Result =
24314 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24315 }
24316
24317 if (XLen == 64)
24318 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24319 return Result;
24320}
24321
24324 AtomicCmpXchgInst *CI) const {
24325 // Don't expand forced atomics, we want to have __sync libcalls instead.
24326 if (Subtarget.hasForcedAtomics())
24328
24330 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24331 (Size == 8 || Size == 16))
24334}
24335
24337 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24338 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24339 unsigned XLen = Subtarget.getXLen();
24340 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24341 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24342 if (XLen == 64) {
24343 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24344 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24345 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24346 }
24347 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24348 Value *Result = Builder.CreateIntrinsic(
24349 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24350 if (XLen == 64)
24351 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24352 return Result;
24353}
24354
24356 EVT DataVT) const {
24357 // We have indexed loads for all supported EEW types. Indices are always
24358 // zero extended.
24359 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24360 isTypeLegal(Extend.getValueType()) &&
24361 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24362 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24363}
24364
24366 EVT VT) const {
24367 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24368 return false;
24369
24370 switch (FPVT.getSimpleVT().SimpleTy) {
24371 case MVT::f16:
24372 return Subtarget.hasStdExtZfhmin();
24373 case MVT::f32:
24374 return Subtarget.hasStdExtF();
24375 case MVT::f64:
24376 return Subtarget.hasStdExtD();
24377 default:
24378 return false;
24379 }
24380}
24381
24383 // If we are using the small code model, we can reduce size of jump table
24384 // entry to 4 bytes.
24385 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24388 }
24390}
24391
24393 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24394 unsigned uid, MCContext &Ctx) const {
24395 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24397 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24398}
24399
24401 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24402 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24403 // a power of two as well.
24404 // FIXME: This doesn't work for zve32, but that's already broken
24405 // elsewhere for the same reason.
24406 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24407 static_assert(RISCV::RVVBitsPerBlock == 64,
24408 "RVVBitsPerBlock changed, audit needed");
24409 return true;
24410}
24411
24413 SDValue &Offset,
24415 SelectionDAG &DAG) const {
24416 // Target does not support indexed loads.
24417 if (!Subtarget.hasVendorXTHeadMemIdx())
24418 return false;
24419
24420 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24421 return false;
24422
24423 Base = Op->getOperand(0);
24424 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24425 int64_t RHSC = RHS->getSExtValue();
24426 if (Op->getOpcode() == ISD::SUB)
24427 RHSC = -(uint64_t)RHSC;
24428
24429 // The constants that can be encoded in the THeadMemIdx instructions
24430 // are of the form (sign_extend(imm5) << imm2).
24431 bool isLegalIndexedOffset = false;
24432 for (unsigned i = 0; i < 4; i++)
24433 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24434 isLegalIndexedOffset = true;
24435 break;
24436 }
24437
24438 if (!isLegalIndexedOffset)
24439 return false;
24440
24441 Offset = Op->getOperand(1);
24442 return true;
24443 }
24444
24445 return false;
24446}
24447
24449 SDValue &Offset,
24451 SelectionDAG &DAG) const {
24452 EVT VT;
24453 SDValue Ptr;
24454 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24455 VT = LD->getMemoryVT();
24456 Ptr = LD->getBasePtr();
24457 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24458 VT = ST->getMemoryVT();
24459 Ptr = ST->getBasePtr();
24460 } else
24461 return false;
24462
24463 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24464 return false;
24465
24466 AM = ISD::PRE_INC;
24467 return true;
24468}
24469
24471 SDValue &Base,
24472 SDValue &Offset,
24474 SelectionDAG &DAG) const {
24475 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24476 if (Op->getOpcode() != ISD::ADD)
24477 return false;
24478
24480 Base = LS->getBasePtr();
24481 else
24482 return false;
24483
24484 if (Base == Op->getOperand(0))
24485 Offset = Op->getOperand(1);
24486 else if (Base == Op->getOperand(1))
24487 Offset = Op->getOperand(0);
24488 else
24489 return false;
24490
24491 AM = ISD::POST_INC;
24492 return true;
24493 }
24494
24495 EVT VT;
24496 SDValue Ptr;
24497 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24498 VT = LD->getMemoryVT();
24499 Ptr = LD->getBasePtr();
24500 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24501 VT = ST->getMemoryVT();
24502 Ptr = ST->getBasePtr();
24503 } else
24504 return false;
24505
24506 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
24507 return false;
24508 // Post-indexing updates the base, so it's not a valid transform
24509 // if that's not the same as the load's pointer.
24510 if (Ptr != Base)
24511 return false;
24512
24513 AM = ISD::POST_INC;
24514 return true;
24515}
24516
24518 EVT VT) const {
24519 EVT SVT = VT.getScalarType();
24520
24521 if (!SVT.isSimple())
24522 return false;
24523
24524 switch (SVT.getSimpleVT().SimpleTy) {
24525 case MVT::f16:
24526 return VT.isVector() ? Subtarget.hasVInstructionsF16()
24527 : Subtarget.hasStdExtZfhOrZhinx();
24528 case MVT::f32:
24529 return Subtarget.hasStdExtFOrZfinx();
24530 case MVT::f64:
24531 return Subtarget.hasStdExtDOrZdinx();
24532 default:
24533 break;
24534 }
24535
24536 return false;
24537}
24538
24540 // Zacas will use amocas.w which does not require extension.
24541 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
24542}
24543
24545 // Zaamo will use amo<op>.w which does not require extension.
24546 if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())
24547 return ISD::ANY_EXTEND;
24548
24549 // Zalrsc pseudo expansions with comparison require sign-extension.
24550 assert(Subtarget.hasStdExtZalrsc());
24551 switch (Op) {
24552 case ISD::ATOMIC_LOAD_MIN:
24553 case ISD::ATOMIC_LOAD_MAX:
24554 case ISD::ATOMIC_LOAD_UMIN:
24555 case ISD::ATOMIC_LOAD_UMAX:
24556 return ISD::SIGN_EXTEND;
24557 default:
24558 break;
24559 }
24560 return ISD::ANY_EXTEND;
24561}
24562
24564 const Constant *PersonalityFn) const {
24565 return RISCV::X10;
24566}
24567
24569 const Constant *PersonalityFn) const {
24570 return RISCV::X11;
24571}
24572
24574 // Return false to suppress the unnecessary extensions if the LibCall
24575 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
24576 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
24577 Type.getSizeInBits() < Subtarget.getXLen()))
24578 return false;
24579
24580 return true;
24581}
24582
24584 bool IsSigned) const {
24585 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
24586 return true;
24587
24588 return IsSigned;
24589}
24590
24592 SDValue C) const {
24593 // Check integral scalar types.
24594 if (!VT.isScalarInteger())
24595 return false;
24596
24597 // Omit the optimization if the sub target has the M extension and the data
24598 // size exceeds XLen.
24599 const bool HasZmmul = Subtarget.hasStdExtZmmul();
24600 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
24601 return false;
24602
24603 auto *ConstNode = cast<ConstantSDNode>(C);
24604 const APInt &Imm = ConstNode->getAPIntValue();
24605
24606 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
24607 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
24608 return false;
24609
24610 // Break the MUL to a SLLI and an ADD/SUB.
24611 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
24612 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
24613 return true;
24614
24615 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24616 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
24617 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
24618 (Imm - 8).isPowerOf2()))
24619 return true;
24620
24621 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
24622 // a pair of LUI/ADDI.
24623 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
24624 ConstNode->hasOneUse()) {
24625 APInt ImmS = Imm.ashr(Imm.countr_zero());
24626 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
24627 (1 - ImmS).isPowerOf2())
24628 return true;
24629 }
24630
24631 return false;
24632}
24633
24635 SDValue ConstNode) const {
24636 // Let the DAGCombiner decide for vectors.
24637 EVT VT = AddNode.getValueType();
24638 if (VT.isVector())
24639 return true;
24640
24641 // Let the DAGCombiner decide for larger types.
24642 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
24643 return true;
24644
24645 // It is worse if c1 is simm12 while c1*c2 is not.
24646 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
24647 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
24648 const APInt &C1 = C1Node->getAPIntValue();
24649 const APInt &C2 = C2Node->getAPIntValue();
24650 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
24651 return false;
24652
24653 // Default to true and let the DAGCombiner decide.
24654 return true;
24655}
24656
24658 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
24659 unsigned *Fast) const {
24660 if (!VT.isVector()) {
24661 if (Fast)
24662 *Fast = Subtarget.enableUnalignedScalarMem();
24663 return Subtarget.enableUnalignedScalarMem();
24664 }
24665
24666 // All vector implementations must support element alignment
24667 EVT ElemVT = VT.getVectorElementType();
24668 if (Alignment >= ElemVT.getStoreSize()) {
24669 if (Fast)
24670 *Fast = 1;
24671 return true;
24672 }
24673
24674 // Note: We lower an unmasked unaligned vector access to an equally sized
24675 // e8 element type access. Given this, we effectively support all unmasked
24676 // misaligned accesses. TODO: Work through the codegen implications of
24677 // allowing such accesses to be formed, and considered fast.
24678 if (Fast)
24679 *Fast = Subtarget.enableUnalignedVectorMem();
24680 return Subtarget.enableUnalignedVectorMem();
24681}
24682
24684 LLVMContext &Context, const MemOp &Op,
24685 const AttributeList &FuncAttributes) const {
24686 if (!Subtarget.hasVInstructions())
24687 return MVT::Other;
24688
24689 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
24690 return MVT::Other;
24691
24692 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
24693 // has an expansion threshold, and we want the number of hardware memory
24694 // operations to correspond roughly to that threshold. LMUL>1 operations
24695 // are typically expanded linearly internally, and thus correspond to more
24696 // than one actual memory operation. Note that store merging and load
24697 // combining will typically form larger LMUL operations from the LMUL1
24698 // operations emitted here, and that's okay because combining isn't
24699 // introducing new memory operations; it's just merging existing ones.
24700 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
24701 const unsigned MinVLenInBytes =
24702 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
24703
24704 if (Op.size() < MinVLenInBytes)
24705 // TODO: Figure out short memops. For the moment, do the default thing
24706 // which ends up using scalar sequences.
24707 return MVT::Other;
24708
24709 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
24710 // fixed vectors.
24711 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
24712 return MVT::Other;
24713
24714 // Prefer i8 for non-zero memset as it allows us to avoid materializing
24715 // a large scalar constant and instead use vmv.v.x/i to do the
24716 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
24717 // maximize the chance we can encode the size in the vsetvli.
24718 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
24719 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
24720
24721 // Do we have sufficient alignment for our preferred VT? If not, revert
24722 // to largest size allowed by our alignment criteria.
24723 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
24724 Align RequiredAlign(PreferredVT.getStoreSize());
24725 if (Op.isFixedDstAlign())
24726 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
24727 if (Op.isMemcpy())
24728 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
24729 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
24730 }
24731 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
24732}
24733
24735 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
24736 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
24737 bool IsABIRegCopy = CC.has_value();
24738 EVT ValueVT = Val.getValueType();
24739
24740 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24741 if ((ValueVT == PairVT ||
24742 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24743 ValueVT == MVT::f64)) &&
24744 NumParts == 1 && PartVT == MVT::Untyped) {
24745 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24746 MVT XLenVT = Subtarget.getXLenVT();
24747 if (ValueVT == MVT::f64)
24748 Val = DAG.getBitcast(MVT::i64, Val);
24749 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
24750 // Always creating an MVT::Untyped part, so always use
24751 // RISCVISD::BuildGPRPair.
24752 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
24753 return true;
24754 }
24755
24756 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24757 PartVT == MVT::f32) {
24758 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
24759 // nan, and cast to f32.
24760 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
24761 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
24762 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
24763 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
24764 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24765 Parts[0] = Val;
24766 return true;
24767 }
24768
24769 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
24770#ifndef NDEBUG
24771 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
24772 [[maybe_unused]] unsigned ValLMUL =
24774 ValNF * RISCV::RVVBitsPerBlock);
24775 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
24776 [[maybe_unused]] unsigned PartLMUL =
24778 PartNF * RISCV::RVVBitsPerBlock);
24779 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
24780 "RISC-V vector tuple type only accepts same register class type "
24781 "TUPLE_INSERT");
24782#endif
24783
24784 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
24785 Val, DAG.getTargetConstant(0, DL, MVT::i32));
24786 Parts[0] = Val;
24787 return true;
24788 }
24789
24790 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24791 PartVT.isScalableVector()) {
24792 if (ValueVT.isFixedLengthVector()) {
24793 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
24794 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
24795 }
24796 LLVMContext &Context = *DAG.getContext();
24797 EVT ValueEltVT = ValueVT.getVectorElementType();
24798 EVT PartEltVT = PartVT.getVectorElementType();
24799 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24800 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24801 if (PartVTBitSize % ValueVTBitSize == 0) {
24802 assert(PartVTBitSize >= ValueVTBitSize);
24803 // If the element types are different, bitcast to the same element type of
24804 // PartVT first.
24805 // Give an example here, we want copy a <vscale x 1 x i8> value to
24806 // <vscale x 4 x i16>.
24807 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
24808 // subvector, then we can bitcast to <vscale x 4 x i16>.
24809 if (ValueEltVT != PartEltVT) {
24810 if (PartVTBitSize > ValueVTBitSize) {
24811 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24812 assert(Count != 0 && "The number of element should not be zero.");
24813 EVT SameEltTypeVT =
24814 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24815 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
24816 }
24817 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
24818 } else {
24819 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
24820 }
24821 Parts[0] = Val;
24822 return true;
24823 }
24824 }
24825
24826 return false;
24827}
24828
24830 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
24831 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
24832 bool IsABIRegCopy = CC.has_value();
24833
24834 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
24835 if ((ValueVT == PairVT ||
24836 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
24837 ValueVT == MVT::f64)) &&
24838 NumParts == 1 && PartVT == MVT::Untyped) {
24839 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
24840 MVT XLenVT = Subtarget.getXLenVT();
24841
24842 SDValue Val = Parts[0];
24843 // Always starting with an MVT::Untyped part, so always use
24844 // RISCVISD::SplitGPRPair
24845 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
24846 Val);
24847 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
24848 Val.getValue(1));
24849 if (ValueVT == MVT::f64)
24850 Val = DAG.getBitcast(ValueVT, Val);
24851 return Val;
24852 }
24853
24854 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
24855 PartVT == MVT::f32) {
24856 SDValue Val = Parts[0];
24857
24858 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
24859 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
24860 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
24861 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
24862 return Val;
24863 }
24864
24865 if ((ValueVT.isScalableVector() || ValueVT.isFixedLengthVector()) &&
24866 PartVT.isScalableVector()) {
24867 LLVMContext &Context = *DAG.getContext();
24868 SDValue Val = Parts[0];
24869 EVT ValueEltVT = ValueVT.getVectorElementType();
24870 EVT PartEltVT = PartVT.getVectorElementType();
24871 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
24872 if (ValueVT.isFixedLengthVector())
24873 ValueVTBitSize = getContainerForFixedLengthVector(ValueVT.getSimpleVT())
24874 .getSizeInBits()
24876 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
24877 if (PartVTBitSize % ValueVTBitSize == 0) {
24878 assert(PartVTBitSize >= ValueVTBitSize);
24879 EVT SameEltTypeVT = ValueVT;
24880 // If the element types are different, convert it to the same element type
24881 // of PartVT.
24882 // Give an example here, we want copy a <vscale x 1 x i8> value from
24883 // <vscale x 4 x i16>.
24884 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
24885 // then we can extract <vscale x 1 x i8>.
24886 if (ValueEltVT != PartEltVT) {
24887 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
24888 assert(Count != 0 && "The number of element should not be zero.");
24889 SameEltTypeVT =
24890 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
24891 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
24892 }
24893 if (ValueVT.isFixedLengthVector())
24894 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
24895 else
24896 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
24897 return Val;
24898 }
24899 }
24900 return SDValue();
24901}
24902
24903bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
24904 // When aggressively optimizing for code size, we prefer to use a div
24905 // instruction, as it is usually smaller than the alternative sequence.
24906 // TODO: Add vector division?
24907 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
24908 return OptSize && !VT.isVector() &&
24910}
24911
24913 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
24914 // some situation.
24915 unsigned Opc = N->getOpcode();
24917 return false;
24918 return true;
24919}
24920
24921static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
24922 Module *M = IRB.GetInsertBlock()->getModule();
24923 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
24924 M, Intrinsic::thread_pointer, IRB.getPtrTy());
24925 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
24926 IRB.CreateCall(ThreadPointerFunc), Offset);
24927}
24928
24930 // Fuchsia provides a fixed TLS slot for the stack cookie.
24931 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
24932 if (Subtarget.isTargetFuchsia())
24933 return useTpOffset(IRB, -0x10);
24934
24935 // Android provides a fixed TLS slot for the stack cookie. See the definition
24936 // of TLS_SLOT_STACK_GUARD in
24937 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
24938 if (Subtarget.isTargetAndroid())
24939 return useTpOffset(IRB, -0x18);
24940
24941 Module *M = IRB.GetInsertBlock()->getModule();
24942
24943 if (M->getStackProtectorGuard() == "tls") {
24944 // Users must specify the offset explicitly
24945 int Offset = M->getStackProtectorGuardOffset();
24946 return useTpOffset(IRB, Offset);
24947 }
24948
24950}
24951
24953 Align Alignment) const {
24954 if (!Subtarget.hasVInstructions())
24955 return false;
24956
24957 // Only support fixed vectors if we know the minimum vector size.
24958 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
24959 return false;
24960
24961 EVT ScalarType = DataType.getScalarType();
24962 if (!isLegalElementTypeForRVV(ScalarType))
24963 return false;
24964
24965 if (!Subtarget.enableUnalignedVectorMem() &&
24966 Alignment < ScalarType.getStoreSize())
24967 return false;
24968
24969 return true;
24970}
24971
24975 const TargetInstrInfo *TII) const {
24976 assert(MBBI->isCall() && MBBI->getCFIType() &&
24977 "Invalid call instruction for a KCFI check");
24978 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
24979 MBBI->getOpcode()));
24980
24981 MachineOperand &Target = MBBI->getOperand(0);
24982 Target.setIsRenamable(false);
24983
24984 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
24985 .addReg(Target.getReg())
24986 .addImm(MBBI->getCFIType())
24987 .getInstr();
24988}
24989
24990#define GET_REGISTER_MATCHER
24991#include "RISCVGenAsmMatcher.inc"
24992
24995 const MachineFunction &MF) const {
24997 if (!Reg)
24999 if (!Reg)
25000 return Reg;
25001
25002 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
25003 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
25004 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
25005 StringRef(RegName) + "\"."));
25006 return Reg;
25007}
25008
25011 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
25012
25013 if (NontemporalInfo == nullptr)
25015
25016 // 1 for default value work as __RISCV_NTLH_ALL
25017 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
25018 // 3 -> __RISCV_NTLH_ALL_PRIVATE
25019 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
25020 // 5 -> __RISCV_NTLH_ALL
25021 int NontemporalLevel = 5;
25022 const MDNode *RISCVNontemporalInfo =
25023 I.getMetadata("riscv-nontemporal-domain");
25024 if (RISCVNontemporalInfo != nullptr)
25025 NontemporalLevel =
25027 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
25028 ->getValue())
25029 ->getZExtValue();
25030
25031 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
25032 "RISC-V target doesn't support this non-temporal domain.");
25033
25034 NontemporalLevel -= 2;
25036 if (NontemporalLevel & 0b1)
25037 Flags |= MONontemporalBit0;
25038 if (NontemporalLevel & 0b10)
25039 Flags |= MONontemporalBit1;
25040
25041 return Flags;
25042}
25043
25046
25047 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
25049 TargetFlags |= (NodeFlags & MONontemporalBit0);
25050 TargetFlags |= (NodeFlags & MONontemporalBit1);
25051 return TargetFlags;
25052}
25053
25055 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
25056 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
25057}
25058
25060 if (VT.isVector()) {
25061 EVT SVT = VT.getVectorElementType();
25062 // If the element type is legal we can use cpop.v if it is enabled.
25063 if (isLegalElementTypeForRVV(SVT))
25064 return Subtarget.hasStdExtZvbb();
25065 // Don't consider it fast if the type needs to be legalized or scalarized.
25066 return false;
25067 }
25068
25069 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
25070}
25071
25073 ISD::CondCode Cond) const {
25074 return isCtpopFast(VT) ? 0 : 1;
25075}
25076
25078 const Instruction *I) const {
25079 if (Subtarget.hasStdExtZalasr()) {
25080 if (Subtarget.hasStdExtZtso()) {
25081 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
25082 // should be lowered to plain load/store. The easiest way to do this is
25083 // to say we should insert fences for them, and the fence insertion code
25084 // will just not insert any fences
25085 auto *LI = dyn_cast<LoadInst>(I);
25086 auto *SI = dyn_cast<StoreInst>(I);
25087 if ((LI &&
25088 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
25089 (SI &&
25090 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
25091 // Here, this is a load or store which is seq_cst, and needs a .aq or
25092 // .rl therefore we shouldn't try to insert fences
25093 return false;
25094 }
25095 // Here, we are a TSO inst that isn't a seq_cst load/store
25096 return isa<LoadInst>(I) || isa<StoreInst>(I);
25097 }
25098 return false;
25099 }
25100 // Note that one specific case requires fence insertion for an
25101 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
25102 // than this hook due to limitations in the interface here.
25103 return isa<LoadInst>(I) || isa<StoreInst>(I);
25104}
25105
25107
25108 // GISel support is in progress or complete for these opcodes.
25109 unsigned Op = Inst.getOpcode();
25110 if (Op == Instruction::Add || Op == Instruction::Sub ||
25111 Op == Instruction::And || Op == Instruction::Or ||
25112 Op == Instruction::Xor || Op == Instruction::InsertElement ||
25113 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
25114 Op == Instruction::Freeze || Op == Instruction::Store)
25115 return false;
25116
25117 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
25118 // Mark RVV intrinsic as supported.
25119 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {
25120 // GISel doesn't support tuple types yet.
25121 if (Inst.getType()->isRISCVVectorTupleTy())
25122 return true;
25123
25124 for (unsigned i = 0; i < II->arg_size(); ++i)
25125 if (II->getArgOperand(i)->getType()->isRISCVVectorTupleTy())
25126 return true;
25127
25128 return false;
25129 }
25130 }
25131
25132 if (Inst.getType()->isScalableTy())
25133 return true;
25134
25135 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
25136 if (Inst.getOperand(i)->getType()->isScalableTy() &&
25137 !isa<ReturnInst>(&Inst))
25138 return true;
25139
25140 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
25141 if (AI->getAllocatedType()->isScalableTy())
25142 return true;
25143 }
25144
25145 return false;
25146}
25147
25148SDValue
25149RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
25150 SelectionDAG &DAG,
25151 SmallVectorImpl<SDNode *> &Created) const {
25152 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
25153 if (isIntDivCheap(N->getValueType(0), Attr))
25154 return SDValue(N, 0); // Lower SDIV as SDIV
25155
25156 // Only perform this transform if short forward branch opt is supported.
25157 if (!Subtarget.hasShortForwardBranchOpt())
25158 return SDValue();
25159 EVT VT = N->getValueType(0);
25160 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
25161 return SDValue();
25162
25163 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
25164 if (Divisor.sgt(2048) || Divisor.slt(-2048))
25165 return SDValue();
25166 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
25167}
25168
25169bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
25170 EVT VT, const APInt &AndMask) const {
25171 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
25172 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
25174}
25175
25177 return Subtarget.getMinimumJumpTableEntries();
25178}
25179
25181 SDValue Value, SDValue Addr,
25182 int JTI,
25183 SelectionDAG &DAG) const {
25184 if (Subtarget.hasStdExtZicfilp()) {
25185 // When Zicfilp enabled, we need to use software guarded branch for jump
25186 // table branch.
25187 SDValue Chain = Value;
25188 // Jump table debug info is only needed if CodeView is enabled.
25190 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
25191 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
25192 }
25193 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
25194}
25195
25196// If an output pattern produces multiple instructions tablegen may pick an
25197// arbitrary type from an instructions destination register class to use for the
25198// VT of that MachineSDNode. This VT may be used to look up the representative
25199// register class. If the type isn't legal, the default implementation will
25200// not find a register class.
25201//
25202// Some integer types smaller than XLen are listed in the GPR register class to
25203// support isel patterns for GISel, but are not legal in SelectionDAG. The
25204// arbitrary type tablegen picks may be one of these smaller types.
25205//
25206// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
25207// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
25208std::pair<const TargetRegisterClass *, uint8_t>
25209RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
25210 MVT VT) const {
25211 switch (VT.SimpleTy) {
25212 default:
25213 break;
25214 case MVT::i8:
25215 case MVT::i16:
25216 case MVT::i32:
25218 case MVT::bf16:
25219 case MVT::f16:
25221 }
25222
25224}
25225
25227
25228#define GET_RISCVVIntrinsicsTable_IMPL
25229#include "RISCVGenSearchableTables.inc"
25230
25231} // namespace llvm::RISCVVIntrinsicsTable
25232
25234
25235 // If the function specifically requests inline stack probes, emit them.
25236 if (MF.getFunction().hasFnAttribute("probe-stack"))
25237 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
25238 "inline-asm";
25239
25240 return false;
25241}
25242
25244 Align StackAlign) const {
25245 // The default stack probe size is 4096 if the function has no
25246 // stack-probe-size attribute.
25247 const Function &Fn = MF.getFunction();
25248 unsigned StackProbeSize =
25249 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25250 // Round down to the stack alignment.
25251 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25252 return StackProbeSize ? StackProbeSize : StackAlign.value();
25253}
25254
25255SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25256 SelectionDAG &DAG) const {
25258 if (!hasInlineStackProbe(MF))
25259 return SDValue();
25260
25261 MVT XLenVT = Subtarget.getXLenVT();
25262 // Get the inputs.
25263 SDValue Chain = Op.getOperand(0);
25264 SDValue Size = Op.getOperand(1);
25265
25267 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25268 SDLoc dl(Op);
25269 EVT VT = Op.getValueType();
25270
25271 // Construct the new SP value in a GPR.
25272 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25273 Chain = SP.getValue(1);
25274 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25275 if (Align)
25276 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25277 DAG.getSignedConstant(-Align->value(), dl, VT));
25278
25279 // Set the real SP to the new value with a probing loop.
25280 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25281 return DAG.getMergeValues({SP, Chain}, dl);
25282}
25283
25286 MachineBasicBlock *MBB) const {
25287 MachineFunction &MF = *MBB->getParent();
25288 MachineBasicBlock::iterator MBBI = MI.getIterator();
25289 DebugLoc DL = MBB->findDebugLoc(MBBI);
25290 Register TargetReg = MI.getOperand(0).getReg();
25291
25292 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25293 bool IsRV64 = Subtarget.is64Bit();
25294 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25295 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25296 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25297
25298 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25299 MachineBasicBlock *LoopTestMBB =
25300 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25301 MF.insert(MBBInsertPoint, LoopTestMBB);
25302 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25303 MF.insert(MBBInsertPoint, ExitMBB);
25304 Register SPReg = RISCV::X2;
25305 Register ScratchReg =
25306 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25307
25308 // ScratchReg = ProbeSize
25309 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25310
25311 // LoopTest:
25312 // SUB SP, SP, ProbeSize
25313 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25314 .addReg(SPReg)
25315 .addReg(ScratchReg);
25316
25317 // s[d|w] zero, 0(sp)
25318 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25319 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25320 .addReg(RISCV::X0)
25321 .addReg(SPReg)
25322 .addImm(0);
25323
25324 // BLT TargetReg, SP, LoopTest
25325 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25326 .addReg(TargetReg)
25327 .addReg(SPReg)
25328 .addMBB(LoopTestMBB);
25329
25330 // Adjust with: MV SP, TargetReg.
25331 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25332 .addReg(TargetReg)
25333 .addImm(0);
25334
25335 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25337
25338 LoopTestMBB->addSuccessor(ExitMBB);
25339 LoopTestMBB->addSuccessor(LoopTestMBB);
25340 MBB->addSuccessor(LoopTestMBB);
25341
25342 MI.eraseFromParent();
25343 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25344 return ExitMBB->begin()->getParent();
25345}
25346
25348 if (Subtarget.hasStdExtFOrZfinx()) {
25349 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25350 return RCRegs;
25351 }
25352 return {};
25353}
25354
25356 EVT VT = Y.getValueType();
25357
25358 if (VT.isVector())
25359 return false;
25360
25361 return VT.getSizeInBits() <= Subtarget.getXLen();
25362}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static constexpr unsigned long long mask(BlockVerifier::State S)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(3))
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, unsigned ShY, bool AddX, unsigned Shift)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt, unsigned Shift)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:489
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & set()
Definition BitVector.h:370
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:194
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:248
unsigned size() const
Definition DenseMap.h:110
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:313
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1939
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override
Returns how the platform's atomic rmw operations expect their input argument to be extended (ZERO_EXT...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:776
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI bool isRISCVVectorTupleTy() const
Definition Type.cpp:147
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:181
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:231
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:257
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
constexpr bool isZero() const
Definition TypeSize.h:154
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:253
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:628
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:688
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:633
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:678
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:654
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:696
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:617
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:859
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:863
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1588
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1968
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
int isShifted359(T Value, int &Shift)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...