LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit()) {
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
250 }
251
252 if (!Subtarget.hasBasicD()) {
254 if (Subtarget.is64Bit()) {
257 }
258 }
259 }
260
261 // Set operations for 'D' feature.
262
263 if (Subtarget.hasBasicD()) {
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
266 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
269 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
270 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
271
274 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
278 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
279 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
282 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
284 setOperationAction(ISD::FSIN, MVT::f64, Expand);
285 setOperationAction(ISD::FCOS, MVT::f64, Expand);
286 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
287 setOperationAction(ISD::FPOW, MVT::f64, Expand);
289 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
290 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
291 Subtarget.isSoftFPABI() ? LibCall : Custom);
292 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
293 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
294 Subtarget.isSoftFPABI() ? LibCall : Custom);
295
296 if (Subtarget.is64Bit()) {
297 setOperationAction(ISD::FRINT, MVT::f64, Legal);
298 setOperationAction(ISD::FLOG2, MVT::f64, Legal);
299 }
300 }
301
302 // Set operations for 'LSX' feature.
303
304 if (Subtarget.hasExtLSX()) {
306 // Expand all truncating stores and extending loads.
307 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
308 setTruncStoreAction(VT, InnerVT, Expand);
311 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
312 }
313 // By default everything must be expanded. Then we will selectively turn
314 // on ones that can be effectively codegen'd.
315 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
317 }
318
319 for (MVT VT : LSXVTs) {
320 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
321 setOperationAction(ISD::BITCAST, VT, Legal);
323
327
332 }
333 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
336 Legal);
338 VT, Legal);
345 Expand);
354 }
355 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
357 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
359 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
362 }
363 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
367 setOperationAction(ISD::FSQRT, VT, Legal);
368 setOperationAction(ISD::FNEG, VT, Legal);
369 setOperationAction(ISD::FLOG2, VT, Legal);
372 VT, Expand);
374 setOperationAction(ISD::FCEIL, VT, Legal);
375 setOperationAction(ISD::FFLOOR, VT, Legal);
376 setOperationAction(ISD::FTRUNC, VT, Legal);
377 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
378 setOperationAction(ISD::FMINNUM, VT, Legal);
379 setOperationAction(ISD::FMAXNUM, VT, Legal);
380 }
382 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
384 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
385 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
386
387 for (MVT VT :
388 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
389 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
391 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
392 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
393 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
394 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
395 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
396 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
397 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
398 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
399 }
400 }
401
402 // Set operations for 'LASX' feature.
403
404 if (Subtarget.hasExtLASX()) {
405 for (MVT VT : LASXVTs) {
406 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
407 setOperationAction(ISD::BITCAST, VT, Legal);
409
415
419 }
420 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
423 Legal);
425 VT, Legal);
432 Expand);
441 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
455 setOperationAction(ISD::FSQRT, VT, Legal);
456 setOperationAction(ISD::FNEG, VT, Legal);
457 setOperationAction(ISD::FLOG2, VT, Legal);
460 VT, Expand);
462 setOperationAction(ISD::FCEIL, VT, Legal);
463 setOperationAction(ISD::FFLOOR, VT, Legal);
464 setOperationAction(ISD::FTRUNC, VT, Legal);
465 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
466 setOperationAction(ISD::FMINNUM, VT, Legal);
467 setOperationAction(ISD::FMAXNUM, VT, Legal);
468 }
469 }
470
471 // Set DAG combine for LA32 and LA64.
472
477
478 // Set DAG combine for 'LSX' feature.
479
480 if (Subtarget.hasExtLSX()) {
482 setTargetDAGCombine(ISD::BITCAST);
483 }
484
485 // Set DAG combine for 'LASX' feature.
486
487 if (Subtarget.hasExtLASX())
489
490 // Compute derived properties from the register classes.
491 computeRegisterProperties(Subtarget.getRegisterInfo());
492
494
497
498 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
499
501
502 // Function alignments.
504 // Set preferred alignments.
505 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
506 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
507 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
508
509 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
510 if (Subtarget.hasLAMCAS())
512
513 if (Subtarget.hasSCQ()) {
515 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
516 }
517}
518
520 const GlobalAddressSDNode *GA) const {
521 // In order to maximise the opportunity for common subexpression elimination,
522 // keep a separate ADD node for the global address offset instead of folding
523 // it in the global address node. Later peephole optimisations may choose to
524 // fold it back in when profitable.
525 return false;
526}
527
529 SelectionDAG &DAG) const {
530 switch (Op.getOpcode()) {
531 case ISD::ATOMIC_FENCE:
532 return lowerATOMIC_FENCE(Op, DAG);
534 return lowerEH_DWARF_CFA(Op, DAG);
536 return lowerGlobalAddress(Op, DAG);
538 return lowerGlobalTLSAddress(Op, DAG);
540 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
542 return lowerINTRINSIC_W_CHAIN(Op, DAG);
544 return lowerINTRINSIC_VOID(Op, DAG);
546 return lowerBlockAddress(Op, DAG);
547 case ISD::JumpTable:
548 return lowerJumpTable(Op, DAG);
549 case ISD::SHL_PARTS:
550 return lowerShiftLeftParts(Op, DAG);
551 case ISD::SRA_PARTS:
552 return lowerShiftRightParts(Op, DAG, true);
553 case ISD::SRL_PARTS:
554 return lowerShiftRightParts(Op, DAG, false);
556 return lowerConstantPool(Op, DAG);
557 case ISD::FP_TO_SINT:
558 return lowerFP_TO_SINT(Op, DAG);
559 case ISD::BITCAST:
560 return lowerBITCAST(Op, DAG);
561 case ISD::UINT_TO_FP:
562 return lowerUINT_TO_FP(Op, DAG);
563 case ISD::SINT_TO_FP:
564 return lowerSINT_TO_FP(Op, DAG);
565 case ISD::VASTART:
566 return lowerVASTART(Op, DAG);
567 case ISD::FRAMEADDR:
568 return lowerFRAMEADDR(Op, DAG);
569 case ISD::RETURNADDR:
570 return lowerRETURNADDR(Op, DAG);
572 return lowerWRITE_REGISTER(Op, DAG);
574 return lowerINSERT_VECTOR_ELT(Op, DAG);
576 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
578 return lowerBUILD_VECTOR(Op, DAG);
580 return lowerCONCAT_VECTORS(Op, DAG);
582 return lowerVECTOR_SHUFFLE(Op, DAG);
583 case ISD::BITREVERSE:
584 return lowerBITREVERSE(Op, DAG);
586 return lowerSCALAR_TO_VECTOR(Op, DAG);
587 case ISD::PREFETCH:
588 return lowerPREFETCH(Op, DAG);
589 case ISD::SELECT:
590 return lowerSELECT(Op, DAG);
591 case ISD::BRCOND:
592 return lowerBRCOND(Op, DAG);
593 case ISD::FP_TO_FP16:
594 return lowerFP_TO_FP16(Op, DAG);
595 case ISD::FP16_TO_FP:
596 return lowerFP16_TO_FP(Op, DAG);
597 case ISD::FP_TO_BF16:
598 return lowerFP_TO_BF16(Op, DAG);
599 case ISD::BF16_TO_FP:
600 return lowerBF16_TO_FP(Op, DAG);
601 case ISD::VECREDUCE_ADD:
602 return lowerVECREDUCE_ADD(Op, DAG);
603 case ISD::VECREDUCE_AND:
604 case ISD::VECREDUCE_OR:
605 case ISD::VECREDUCE_XOR:
606 case ISD::VECREDUCE_SMAX:
607 case ISD::VECREDUCE_SMIN:
608 case ISD::VECREDUCE_UMAX:
609 case ISD::VECREDUCE_UMIN:
610 return lowerVECREDUCE(Op, DAG);
611 case ISD::ConstantFP:
612 return lowerConstantFP(Op, DAG);
613 }
614 return SDValue();
615}
616
617SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
618 SelectionDAG &DAG) const {
619 EVT VT = Op.getValueType();
621 const APFloat &FPVal = CFP->getValueAPF();
622 SDLoc DL(CFP);
623
624 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
625 (VT == MVT::f64 && Subtarget.hasBasicD()));
626
627 // If value is 0.0 or -0.0, just ignore it.
628 if (FPVal.isZero())
629 return SDValue();
630
631 // If lsx enabled, use cheaper 'vldi' instruction if possible.
632 if (isFPImmVLDILegal(FPVal, VT))
633 return SDValue();
634
635 // Construct as integer, and move to float register.
636 APInt INTVal = FPVal.bitcastToAPInt();
637
638 // If more than MaterializeFPImmInsNum instructions will be used to
639 // generate the INTVal and move it to float register, fallback to
640 // use floating point load from the constant pool.
642 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
643 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
644 return SDValue();
645
646 switch (VT.getSimpleVT().SimpleTy) {
647 default:
648 llvm_unreachable("Unexpected floating point type!");
649 break;
650 case MVT::f32: {
651 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
652 if (Subtarget.is64Bit())
653 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
654 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
656 DL, VT, NewVal);
657 }
658 case MVT::f64: {
659 if (Subtarget.is64Bit()) {
660 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
661 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
662 }
663 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
664 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
665 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
666 }
667 }
668
669 return SDValue();
670}
671
672// Lower vecreduce_add using vhaddw instructions.
673// For Example:
674// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
675// can be lowered to:
676// VHADDW_D_W vr0, vr0, vr0
677// VHADDW_Q_D vr0, vr0, vr0
678// VPICKVE2GR_D a0, vr0, 0
679// ADDI_W a0, a0, 0
680SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
681 SelectionDAG &DAG) const {
682
683 SDLoc DL(Op);
684 MVT OpVT = Op.getSimpleValueType();
685 SDValue Val = Op.getOperand(0);
686
687 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
688 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
689 unsigned ResBits = OpVT.getScalarSizeInBits();
690
691 unsigned LegalVecSize = 128;
692 bool isLASX256Vector =
693 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
694
695 // Ensure operand type legal or enable it legal.
696 while (!isTypeLegal(Val.getSimpleValueType())) {
697 Val = DAG.WidenVector(Val, DL);
698 }
699
700 // NumEles is designed for iterations count, v4i32 for LSX
701 // and v8i32 for LASX should have the same count.
702 if (isLASX256Vector) {
703 NumEles /= 2;
704 LegalVecSize = 256;
705 }
706
707 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
708 MVT IntTy = MVT::getIntegerVT(EleBits);
709 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
710 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
711 }
712
713 if (isLASX256Vector) {
714 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
715 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
716 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
717 }
718
719 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
720 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
721 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
722}
723
724// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
725// For Example:
726// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
727// can be lowered to:
728// VBSRL_V vr1, vr0, 8
729// VMAX_W vr0, vr1, vr0
730// VBSRL_V vr1, vr0, 4
731// VMAX_W vr0, vr1, vr0
732// VPICKVE2GR_W a0, vr0, 0
733// For 256 bit vector, it is illegal and will be spilt into
734// two 128 bit vector by default then processed by this.
735SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
736 SelectionDAG &DAG) const {
737 SDLoc DL(Op);
738
739 MVT OpVT = Op.getSimpleValueType();
740 SDValue Val = Op.getOperand(0);
741
742 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
743 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
744
745 // Ensure operand type legal or enable it legal.
746 while (!isTypeLegal(Val.getSimpleValueType())) {
747 Val = DAG.WidenVector(Val, DL);
748 }
749
750 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
751 MVT VecTy = Val.getSimpleValueType();
752 MVT GRLenVT = Subtarget.getGRLenVT();
753
754 for (int i = NumEles; i > 1; i /= 2) {
755 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
756 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
757 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
758 }
759
760 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
761 DAG.getConstant(0, DL, GRLenVT));
762}
763
764SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
765 SelectionDAG &DAG) const {
766 unsigned IsData = Op.getConstantOperandVal(4);
767
768 // We don't support non-data prefetch.
769 // Just preserve the chain.
770 if (!IsData)
771 return Op.getOperand(0);
772
773 return Op;
774}
775
776// Return true if Val is equal to (setcc LHS, RHS, CC).
777// Return false if Val is the inverse of (setcc LHS, RHS, CC).
778// Otherwise, return std::nullopt.
779static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
780 ISD::CondCode CC, SDValue Val) {
781 assert(Val->getOpcode() == ISD::SETCC);
782 SDValue LHS2 = Val.getOperand(0);
783 SDValue RHS2 = Val.getOperand(1);
784 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
785
786 if (LHS == LHS2 && RHS == RHS2) {
787 if (CC == CC2)
788 return true;
789 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
790 return false;
791 } else if (LHS == RHS2 && RHS == LHS2) {
793 if (CC == CC2)
794 return true;
795 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
796 return false;
797 }
798
799 return std::nullopt;
800}
801
803 const LoongArchSubtarget &Subtarget) {
804 SDValue CondV = N->getOperand(0);
805 SDValue TrueV = N->getOperand(1);
806 SDValue FalseV = N->getOperand(2);
807 MVT VT = N->getSimpleValueType(0);
808 SDLoc DL(N);
809
810 // (select c, -1, y) -> -c | y
811 if (isAllOnesConstant(TrueV)) {
812 SDValue Neg = DAG.getNegative(CondV, DL, VT);
813 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
814 }
815 // (select c, y, -1) -> (c-1) | y
816 if (isAllOnesConstant(FalseV)) {
817 SDValue Neg =
818 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
819 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
820 }
821
822 // (select c, 0, y) -> (c-1) & y
823 if (isNullConstant(TrueV)) {
824 SDValue Neg =
825 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
826 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
827 }
828 // (select c, y, 0) -> -c & y
829 if (isNullConstant(FalseV)) {
830 SDValue Neg = DAG.getNegative(CondV, DL, VT);
831 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
832 }
833
834 // select c, ~x, x --> xor -c, x
835 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
836 const APInt &TrueVal = TrueV->getAsAPIntVal();
837 const APInt &FalseVal = FalseV->getAsAPIntVal();
838 if (~TrueVal == FalseVal) {
839 SDValue Neg = DAG.getNegative(CondV, DL, VT);
840 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
841 }
842 }
843
844 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
845 // when both truev and falsev are also setcc.
846 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
847 FalseV.getOpcode() == ISD::SETCC) {
848 SDValue LHS = CondV.getOperand(0);
849 SDValue RHS = CondV.getOperand(1);
850 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
851
852 // (select x, x, y) -> x | y
853 // (select !x, x, y) -> x & y
854 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
855 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
856 DAG.getFreeze(FalseV));
857 }
858 // (select x, y, x) -> x & y
859 // (select !x, y, x) -> x | y
860 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
861 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
862 DAG.getFreeze(TrueV), FalseV);
863 }
864 }
865
866 return SDValue();
867}
868
869// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
870// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
871// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
872// being `0` or `-1`. In such cases we can replace `select` with `and`.
873// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
874// than `c0`?
875static SDValue
877 const LoongArchSubtarget &Subtarget) {
878 unsigned SelOpNo = 0;
879 SDValue Sel = BO->getOperand(0);
880 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
881 SelOpNo = 1;
882 Sel = BO->getOperand(1);
883 }
884
885 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
886 return SDValue();
887
888 unsigned ConstSelOpNo = 1;
889 unsigned OtherSelOpNo = 2;
890 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
891 ConstSelOpNo = 2;
892 OtherSelOpNo = 1;
893 }
894 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
895 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
896 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
897 return SDValue();
898
899 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
900 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
901 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
902 return SDValue();
903
904 SDLoc DL(Sel);
905 EVT VT = BO->getValueType(0);
906
907 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
908 if (SelOpNo == 1)
909 std::swap(NewConstOps[0], NewConstOps[1]);
910
911 SDValue NewConstOp =
912 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
913 if (!NewConstOp)
914 return SDValue();
915
916 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
917 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
918 return SDValue();
919
920 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
921 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
922 if (SelOpNo == 1)
923 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
924 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
925
926 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
927 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
928 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
929}
930
931// Changes the condition code and swaps operands if necessary, so the SetCC
932// operation matches one of the comparisons supported directly by branches
933// in the LoongArch ISA. May adjust compares to favor compare with 0 over
934// compare with 1/-1.
936 ISD::CondCode &CC, SelectionDAG &DAG) {
937 // If this is a single bit test that can't be handled by ANDI, shift the
938 // bit to be tested to the MSB and perform a signed compare with 0.
939 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
940 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
941 isa<ConstantSDNode>(LHS.getOperand(1))) {
942 uint64_t Mask = LHS.getConstantOperandVal(1);
943 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
944 unsigned ShAmt = 0;
945 if (isPowerOf2_64(Mask)) {
946 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
947 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
948 } else {
949 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
950 }
951
952 LHS = LHS.getOperand(0);
953 if (ShAmt != 0)
954 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
955 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
956 return;
957 }
958 }
959
960 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
961 int64_t C = RHSC->getSExtValue();
962 switch (CC) {
963 default:
964 break;
965 case ISD::SETGT:
966 // Convert X > -1 to X >= 0.
967 if (C == -1) {
968 RHS = DAG.getConstant(0, DL, RHS.getValueType());
969 CC = ISD::SETGE;
970 return;
971 }
972 break;
973 case ISD::SETLT:
974 // Convert X < 1 to 0 >= X.
975 if (C == 1) {
976 RHS = LHS;
977 LHS = DAG.getConstant(0, DL, RHS.getValueType());
978 CC = ISD::SETGE;
979 return;
980 }
981 break;
982 }
983 }
984
985 switch (CC) {
986 default:
987 break;
988 case ISD::SETGT:
989 case ISD::SETLE:
990 case ISD::SETUGT:
991 case ISD::SETULE:
993 std::swap(LHS, RHS);
994 break;
995 }
996}
997
998SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
999 SelectionDAG &DAG) const {
1000 SDValue CondV = Op.getOperand(0);
1001 SDValue TrueV = Op.getOperand(1);
1002 SDValue FalseV = Op.getOperand(2);
1003 SDLoc DL(Op);
1004 MVT VT = Op.getSimpleValueType();
1005 MVT GRLenVT = Subtarget.getGRLenVT();
1006
1007 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1008 return V;
1009
1010 if (Op.hasOneUse()) {
1011 unsigned UseOpc = Op->user_begin()->getOpcode();
1012 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1013 SDNode *BinOp = *Op->user_begin();
1014 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1015 DAG, Subtarget)) {
1016 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1017 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1018 // may return a constant node and cause crash in lowerSELECT.
1019 if (NewSel.getOpcode() == ISD::SELECT)
1020 return lowerSELECT(NewSel, DAG);
1021 return NewSel;
1022 }
1023 }
1024 }
1025
1026 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1027 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1028 // (select condv, truev, falsev)
1029 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1030 if (CondV.getOpcode() != ISD::SETCC ||
1031 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1032 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1033 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1034
1035 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1036
1037 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1038 }
1039
1040 // If the CondV is the output of a SETCC node which operates on GRLenVT
1041 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1042 // to take advantage of the integer compare+branch instructions. i.e.: (select
1043 // (setcc lhs, rhs, cc), truev, falsev)
1044 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1045 SDValue LHS = CondV.getOperand(0);
1046 SDValue RHS = CondV.getOperand(1);
1047 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1048
1049 // Special case for a select of 2 constants that have a difference of 1.
1050 // Normally this is done by DAGCombine, but if the select is introduced by
1051 // type legalization or op legalization, we miss it. Restricting to SETLT
1052 // case for now because that is what signed saturating add/sub need.
1053 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1054 // but we would probably want to swap the true/false values if the condition
1055 // is SETGE/SETLE to avoid an XORI.
1056 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1057 CCVal == ISD::SETLT) {
1058 const APInt &TrueVal = TrueV->getAsAPIntVal();
1059 const APInt &FalseVal = FalseV->getAsAPIntVal();
1060 if (TrueVal - 1 == FalseVal)
1061 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1062 if (TrueVal + 1 == FalseVal)
1063 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1064 }
1065
1066 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1067 // 1 < x ? x : 1 -> 0 < x ? x : 1
1068 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1069 RHS == TrueV && LHS == FalseV) {
1070 LHS = DAG.getConstant(0, DL, VT);
1071 // 0 <u x is the same as x != 0.
1072 if (CCVal == ISD::SETULT) {
1073 std::swap(LHS, RHS);
1074 CCVal = ISD::SETNE;
1075 }
1076 }
1077
1078 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1079 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1080 RHS == FalseV) {
1081 RHS = DAG.getConstant(0, DL, VT);
1082 }
1083
1084 SDValue TargetCC = DAG.getCondCode(CCVal);
1085
1086 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1087 // (select (setcc lhs, rhs, CC), constant, falsev)
1088 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1089 std::swap(TrueV, FalseV);
1090 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1091 }
1092
1093 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1094 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1095}
1096
1097SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1098 SelectionDAG &DAG) const {
1099 SDValue CondV = Op.getOperand(1);
1100 SDLoc DL(Op);
1101 MVT GRLenVT = Subtarget.getGRLenVT();
1102
1103 if (CondV.getOpcode() == ISD::SETCC) {
1104 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1105 SDValue LHS = CondV.getOperand(0);
1106 SDValue RHS = CondV.getOperand(1);
1107 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1108
1109 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1110
1111 SDValue TargetCC = DAG.getCondCode(CCVal);
1112 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1113 Op.getOperand(0), LHS, RHS, TargetCC,
1114 Op.getOperand(2));
1115 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1116 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1117 Op.getOperand(0), CondV, Op.getOperand(2));
1118 }
1119 }
1120
1121 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1122 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1123 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1124}
1125
1126SDValue
1127LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1128 SelectionDAG &DAG) const {
1129 SDLoc DL(Op);
1130 MVT OpVT = Op.getSimpleValueType();
1131
1132 SDValue Vector = DAG.getUNDEF(OpVT);
1133 SDValue Val = Op.getOperand(0);
1134 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1135
1136 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1137}
1138
1139SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1140 SelectionDAG &DAG) const {
1141 EVT ResTy = Op->getValueType(0);
1142 SDValue Src = Op->getOperand(0);
1143 SDLoc DL(Op);
1144
1145 // LoongArchISD::BITREV_8B is not supported on LA32.
1146 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1147 return SDValue();
1148
1149 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1150 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1151 unsigned int NewEltNum = NewVT.getVectorNumElements();
1152
1153 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1154
1156 for (unsigned int i = 0; i < NewEltNum; i++) {
1157 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1158 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1159 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1160 ? (unsigned)LoongArchISD::BITREV_8B
1161 : (unsigned)ISD::BITREVERSE;
1162 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1163 }
1164 SDValue Res =
1165 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1166
1167 switch (ResTy.getSimpleVT().SimpleTy) {
1168 default:
1169 return SDValue();
1170 case MVT::v16i8:
1171 case MVT::v32i8:
1172 return Res;
1173 case MVT::v8i16:
1174 case MVT::v16i16:
1175 case MVT::v4i32:
1176 case MVT::v8i32: {
1178 for (unsigned int i = 0; i < NewEltNum; i++)
1179 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1180 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1181 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1182 }
1183 }
1184}
1185
1186// Widen element type to get a new mask value (if possible).
1187// For example:
1188// shufflevector <4 x i32> %a, <4 x i32> %b,
1189// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1190// is equivalent to:
1191// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1192// can be lowered to:
1193// VPACKOD_D vr0, vr0, vr1
1195 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1196 unsigned EltBits = VT.getScalarSizeInBits();
1197
1198 if (EltBits > 32 || EltBits == 1)
1199 return SDValue();
1200
1201 SmallVector<int, 8> NewMask;
1202 if (widenShuffleMaskElts(Mask, NewMask)) {
1203 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1204 : MVT::getIntegerVT(EltBits * 2);
1205 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1206 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1207 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1208 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1209 return DAG.getBitcast(
1210 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1211 }
1212 }
1213
1214 return SDValue();
1215}
1216
1217/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1218/// instruction.
1219// The funciton matches elements from one of the input vector shuffled to the
1220// left or right with zeroable elements 'shifted in'. It handles both the
1221// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1222// lane.
1223// Mostly copied from X86.
1224static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1225 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1226 int MaskOffset, const APInt &Zeroable) {
1227 int Size = Mask.size();
1228 unsigned SizeInBits = Size * ScalarSizeInBits;
1229
1230 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1231 for (int i = 0; i < Size; i += Scale)
1232 for (int j = 0; j < Shift; ++j)
1233 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1234 return false;
1235
1236 return true;
1237 };
1238
1239 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1240 int Step = 1) {
1241 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1242 if (!(Mask[i] == -1 || Mask[i] == Low))
1243 return false;
1244 return true;
1245 };
1246
1247 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1248 for (int i = 0; i != Size; i += Scale) {
1249 unsigned Pos = Left ? i + Shift : i;
1250 unsigned Low = Left ? i : i + Shift;
1251 unsigned Len = Scale - Shift;
1252 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1253 return -1;
1254 }
1255
1256 int ShiftEltBits = ScalarSizeInBits * Scale;
1257 bool ByteShift = ShiftEltBits > 64;
1258 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1259 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1260 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1261
1262 // Normalize the scale for byte shifts to still produce an i64 element
1263 // type.
1264 Scale = ByteShift ? Scale / 2 : Scale;
1265
1266 // We need to round trip through the appropriate type for the shift.
1267 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1268 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1269 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1270 return (int)ShiftAmt;
1271 };
1272
1273 unsigned MaxWidth = 128;
1274 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1275 for (int Shift = 1; Shift != Scale; ++Shift)
1276 for (bool Left : {true, false})
1277 if (CheckZeros(Shift, Scale, Left)) {
1278 int ShiftAmt = MatchShift(Shift, Scale, Left);
1279 if (0 < ShiftAmt)
1280 return ShiftAmt;
1281 }
1282
1283 // no match
1284 return -1;
1285}
1286
1287/// Lower VECTOR_SHUFFLE as shift (if possible).
1288///
1289/// For example:
1290/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1291/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1292/// is lowered to:
1293/// (VBSLL_V $v0, $v0, 4)
1294///
1295/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1296/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1297/// is lowered to:
1298/// (VSLLI_D $v0, $v0, 32)
1300 MVT VT, SDValue V1, SDValue V2,
1301 SelectionDAG &DAG,
1302 const LoongArchSubtarget &Subtarget,
1303 const APInt &Zeroable) {
1304 int Size = Mask.size();
1305 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1306
1307 MVT ShiftVT;
1308 SDValue V = V1;
1309 unsigned Opcode;
1310
1311 // Try to match shuffle against V1 shift.
1312 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1313 Mask, 0, Zeroable);
1314
1315 // If V1 failed, try to match shuffle against V2 shift.
1316 if (ShiftAmt < 0) {
1317 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1318 Mask, Size, Zeroable);
1319 V = V2;
1320 }
1321
1322 if (ShiftAmt < 0)
1323 return SDValue();
1324
1325 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1326 "Illegal integer vector type");
1327 V = DAG.getBitcast(ShiftVT, V);
1328 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1329 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1330 return DAG.getBitcast(VT, V);
1331}
1332
1333/// Determine whether a range fits a regular pattern of values.
1334/// This function accounts for the possibility of jumping over the End iterator.
1335template <typename ValType>
1336static bool
1338 unsigned CheckStride,
1340 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1341 auto &I = Begin;
1342
1343 while (I != End) {
1344 if (*I != -1 && *I != ExpectedIndex)
1345 return false;
1346 ExpectedIndex += ExpectedIndexStride;
1347
1348 // Incrementing past End is undefined behaviour so we must increment one
1349 // step at a time and check for End at each step.
1350 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1351 ; // Empty loop body.
1352 }
1353 return true;
1354}
1355
1356/// Compute whether each element of a shuffle is zeroable.
1357///
1358/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1360 SDValue V2, APInt &KnownUndef,
1361 APInt &KnownZero) {
1362 int Size = Mask.size();
1363 KnownUndef = KnownZero = APInt::getZero(Size);
1364
1365 V1 = peekThroughBitcasts(V1);
1366 V2 = peekThroughBitcasts(V2);
1367
1368 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1369 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1370
1371 int VectorSizeInBits = V1.getValueSizeInBits();
1372 int ScalarSizeInBits = VectorSizeInBits / Size;
1373 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1374 (void)ScalarSizeInBits;
1375
1376 for (int i = 0; i < Size; ++i) {
1377 int M = Mask[i];
1378 if (M < 0) {
1379 KnownUndef.setBit(i);
1380 continue;
1381 }
1382 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1383 KnownZero.setBit(i);
1384 continue;
1385 }
1386 }
1387}
1388
1389/// Test whether a shuffle mask is equivalent within each sub-lane.
1390///
1391/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1392/// non-trivial to compute in the face of undef lanes. The representation is
1393/// suitable for use with existing 128-bit shuffles as entries from the second
1394/// vector have been remapped to [LaneSize, 2*LaneSize).
1395static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1396 ArrayRef<int> Mask,
1397 SmallVectorImpl<int> &RepeatedMask) {
1398 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1399 RepeatedMask.assign(LaneSize, -1);
1400 int Size = Mask.size();
1401 for (int i = 0; i < Size; ++i) {
1402 assert(Mask[i] == -1 || Mask[i] >= 0);
1403 if (Mask[i] < 0)
1404 continue;
1405 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1406 // This entry crosses lanes, so there is no way to model this shuffle.
1407 return false;
1408
1409 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1410 // Adjust second vector indices to start at LaneSize instead of Size.
1411 int LocalM =
1412 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1413 if (RepeatedMask[i % LaneSize] < 0)
1414 // This is the first non-undef entry in this slot of a 128-bit lane.
1415 RepeatedMask[i % LaneSize] = LocalM;
1416 else if (RepeatedMask[i % LaneSize] != LocalM)
1417 // Found a mismatch with the repeated mask.
1418 return false;
1419 }
1420 return true;
1421}
1422
1423/// Attempts to match vector shuffle as byte rotation.
1425 ArrayRef<int> Mask) {
1426
1427 SDValue Lo, Hi;
1428 SmallVector<int, 16> RepeatedMask;
1429
1430 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1431 return -1;
1432
1433 int NumElts = RepeatedMask.size();
1434 int Rotation = 0;
1435 int Scale = 16 / NumElts;
1436
1437 for (int i = 0; i < NumElts; ++i) {
1438 int M = RepeatedMask[i];
1439 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1440 "Unexpected mask index.");
1441 if (M < 0)
1442 continue;
1443
1444 // Determine where a rotated vector would have started.
1445 int StartIdx = i - (M % NumElts);
1446 if (StartIdx == 0)
1447 return -1;
1448
1449 // If we found the tail of a vector the rotation must be the missing
1450 // front. If we found the head of a vector, it must be how much of the
1451 // head.
1452 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1453
1454 if (Rotation == 0)
1455 Rotation = CandidateRotation;
1456 else if (Rotation != CandidateRotation)
1457 return -1;
1458
1459 // Compute which value this mask is pointing at.
1460 SDValue MaskV = M < NumElts ? V1 : V2;
1461
1462 // Compute which of the two target values this index should be assigned
1463 // to. This reflects whether the high elements are remaining or the low
1464 // elements are remaining.
1465 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1466
1467 // Either set up this value if we've not encountered it before, or check
1468 // that it remains consistent.
1469 if (!TargetV)
1470 TargetV = MaskV;
1471 else if (TargetV != MaskV)
1472 return -1;
1473 }
1474
1475 // Check that we successfully analyzed the mask, and normalize the results.
1476 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1477 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1478 if (!Lo)
1479 Lo = Hi;
1480 else if (!Hi)
1481 Hi = Lo;
1482
1483 V1 = Lo;
1484 V2 = Hi;
1485
1486 return Rotation * Scale;
1487}
1488
1489/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1490///
1491/// For example:
1492/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1493/// <2 x i32> <i32 3, i32 0>
1494/// is lowered to:
1495/// (VBSRL_V $v1, $v1, 8)
1496/// (VBSLL_V $v0, $v0, 8)
1497/// (VOR_V $v0, $V0, $v1)
1498static SDValue
1500 SDValue V1, SDValue V2, SelectionDAG &DAG,
1501 const LoongArchSubtarget &Subtarget) {
1502
1503 SDValue Lo = V1, Hi = V2;
1504 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1505 if (ByteRotation <= 0)
1506 return SDValue();
1507
1508 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1509 Lo = DAG.getBitcast(ByteVT, Lo);
1510 Hi = DAG.getBitcast(ByteVT, Hi);
1511
1512 int LoByteShift = 16 - ByteRotation;
1513 int HiByteShift = ByteRotation;
1514 MVT GRLenVT = Subtarget.getGRLenVT();
1515
1516 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1517 DAG.getConstant(LoByteShift, DL, GRLenVT));
1518 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1519 DAG.getConstant(HiByteShift, DL, GRLenVT));
1520 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1521}
1522
1523/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1524///
1525/// For example:
1526/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1527/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1528/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1529/// is lowered to:
1530/// (VREPLI $v1, 0)
1531/// (VILVL $v0, $v1, $v0)
1533 ArrayRef<int> Mask, MVT VT,
1534 SDValue V1, SDValue V2,
1535 SelectionDAG &DAG,
1536 const APInt &Zeroable) {
1537 int Bits = VT.getSizeInBits();
1538 int EltBits = VT.getScalarSizeInBits();
1539 int NumElements = VT.getVectorNumElements();
1540
1541 if (Zeroable.isAllOnes())
1542 return DAG.getConstant(0, DL, VT);
1543
1544 // Define a helper function to check a particular ext-scale and lower to it if
1545 // valid.
1546 auto Lower = [&](int Scale) -> SDValue {
1547 SDValue InputV;
1548 bool AnyExt = true;
1549 int Offset = 0;
1550 for (int i = 0; i < NumElements; i++) {
1551 int M = Mask[i];
1552 if (M < 0)
1553 continue;
1554 if (i % Scale != 0) {
1555 // Each of the extended elements need to be zeroable.
1556 if (!Zeroable[i])
1557 return SDValue();
1558
1559 AnyExt = false;
1560 continue;
1561 }
1562
1563 // Each of the base elements needs to be consecutive indices into the
1564 // same input vector.
1565 SDValue V = M < NumElements ? V1 : V2;
1566 M = M % NumElements;
1567 if (!InputV) {
1568 InputV = V;
1569 Offset = M - (i / Scale);
1570
1571 // These offset can't be handled
1572 if (Offset % (NumElements / Scale))
1573 return SDValue();
1574 } else if (InputV != V)
1575 return SDValue();
1576
1577 if (M != (Offset + (i / Scale)))
1578 return SDValue(); // Non-consecutive strided elements.
1579 }
1580
1581 // If we fail to find an input, we have a zero-shuffle which should always
1582 // have already been handled.
1583 if (!InputV)
1584 return SDValue();
1585
1586 do {
1587 unsigned VilVLoHi = LoongArchISD::VILVL;
1588 if (Offset >= (NumElements / 2)) {
1589 VilVLoHi = LoongArchISD::VILVH;
1590 Offset -= (NumElements / 2);
1591 }
1592
1593 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1594 SDValue Ext =
1595 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1596 InputV = DAG.getBitcast(InputVT, InputV);
1597 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1598 Scale /= 2;
1599 EltBits *= 2;
1600 NumElements /= 2;
1601 } while (Scale > 1);
1602 return DAG.getBitcast(VT, InputV);
1603 };
1604
1605 // Each iteration, try extending the elements half as much, but into twice as
1606 // many elements.
1607 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1608 NumExtElements *= 2) {
1609 if (SDValue V = Lower(NumElements / NumExtElements))
1610 return V;
1611 }
1612 return SDValue();
1613}
1614
1615/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1616///
1617/// VREPLVEI performs vector broadcast based on an element specified by an
1618/// integer immediate, with its mask being similar to:
1619/// <x, x, x, ...>
1620/// where x is any valid index.
1621///
1622/// When undef's appear in the mask they are treated as if they were whatever
1623/// value is necessary in order to fit the above form.
1624static SDValue
1626 SDValue V1, SelectionDAG &DAG,
1627 const LoongArchSubtarget &Subtarget) {
1628 int SplatIndex = -1;
1629 for (const auto &M : Mask) {
1630 if (M != -1) {
1631 SplatIndex = M;
1632 break;
1633 }
1634 }
1635
1636 if (SplatIndex == -1)
1637 return DAG.getUNDEF(VT);
1638
1639 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1640 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1641 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1642 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1643 }
1644
1645 return SDValue();
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1649///
1650/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1651/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1652///
1653/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1654/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1655/// When undef's appear they are treated as if they were whatever value is
1656/// necessary in order to fit the above forms.
1657///
1658/// For example:
1659/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1660/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1661/// i32 7, i32 6, i32 5, i32 4>
1662/// is lowered to:
1663/// (VSHUF4I_H $v0, $v1, 27)
1664/// where the 27 comes from:
1665/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1666static SDValue
1668 SDValue V1, SDValue V2, SelectionDAG &DAG,
1669 const LoongArchSubtarget &Subtarget) {
1670
1671 unsigned SubVecSize = 4;
1672 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1673 SubVecSize = 2;
1674
1675 int SubMask[4] = {-1, -1, -1, -1};
1676 for (unsigned i = 0; i < SubVecSize; ++i) {
1677 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1678 int M = Mask[j];
1679
1680 // Convert from vector index to 4-element subvector index
1681 // If an index refers to an element outside of the subvector then give up
1682 if (M != -1) {
1683 M -= 4 * (j / SubVecSize);
1684 if (M < 0 || M >= 4)
1685 return SDValue();
1686 }
1687
1688 // If the mask has an undef, replace it with the current index.
1689 // Note that it might still be undef if the current index is also undef
1690 if (SubMask[i] == -1)
1691 SubMask[i] = M;
1692 // Check that non-undef values are the same as in the mask. If they
1693 // aren't then give up
1694 else if (M != -1 && M != SubMask[i])
1695 return SDValue();
1696 }
1697 }
1698
1699 // Calculate the immediate. Replace any remaining undefs with zero
1700 int Imm = 0;
1701 for (int i = SubVecSize - 1; i >= 0; --i) {
1702 int M = SubMask[i];
1703
1704 if (M == -1)
1705 M = 0;
1706
1707 Imm <<= 2;
1708 Imm |= M & 0x3;
1709 }
1710
1711 MVT GRLenVT = Subtarget.getGRLenVT();
1712
1713 // Return vshuf4i.d
1714 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1715 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1716 DAG.getConstant(Imm, DL, GRLenVT));
1717
1718 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1719 DAG.getConstant(Imm, DL, GRLenVT));
1720}
1721
1722/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1723///
1724/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1725/// reverse whose mask likes:
1726/// <7, 6, 5, 4, 3, 2, 1, 0>
1727///
1728/// When undef's appear in the mask they are treated as if they were whatever
1729/// value is necessary in order to fit the above forms.
1730static SDValue
1732 SDValue V1, SelectionDAG &DAG,
1733 const LoongArchSubtarget &Subtarget) {
1734 // Only vectors with i8/i16 elements which cannot match other patterns
1735 // directly needs to do this.
1736 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1737 VT != MVT::v16i16)
1738 return SDValue();
1739
1740 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1741 return SDValue();
1742
1743 int WidenNumElts = VT.getVectorNumElements() / 4;
1744 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1745 for (int i = 0; i < WidenNumElts; ++i)
1746 WidenMask[i] = WidenNumElts - 1 - i;
1747
1748 MVT WidenVT = MVT::getVectorVT(
1749 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1750 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1751 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1752 DAG.getUNDEF(WidenVT), WidenMask);
1753
1754 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1755 DAG.getBitcast(VT, WidenRev),
1756 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1757}
1758
1759/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1760///
1761/// VPACKEV interleaves the even elements from each vector.
1762///
1763/// It is possible to lower into VPACKEV when the mask consists of two of the
1764/// following forms interleaved:
1765/// <0, 2, 4, ...>
1766/// <n, n+2, n+4, ...>
1767/// where n is the number of elements in the vector.
1768/// For example:
1769/// <0, 0, 2, 2, 4, 4, ...>
1770/// <0, n, 2, n+2, 4, n+4, ...>
1771///
1772/// When undef's appear in the mask they are treated as if they were whatever
1773/// value is necessary in order to fit the above forms.
1775 MVT VT, SDValue V1, SDValue V2,
1776 SelectionDAG &DAG) {
1777
1778 const auto &Begin = Mask.begin();
1779 const auto &End = Mask.end();
1780 SDValue OriV1 = V1, OriV2 = V2;
1781
1782 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1783 V1 = OriV1;
1784 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1785 V1 = OriV2;
1786 else
1787 return SDValue();
1788
1789 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1790 V2 = OriV1;
1791 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1792 V2 = OriV2;
1793 else
1794 return SDValue();
1795
1796 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1797}
1798
1799/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1800///
1801/// VPACKOD interleaves the odd elements from each vector.
1802///
1803/// It is possible to lower into VPACKOD when the mask consists of two of the
1804/// following forms interleaved:
1805/// <1, 3, 5, ...>
1806/// <n+1, n+3, n+5, ...>
1807/// where n is the number of elements in the vector.
1808/// For example:
1809/// <1, 1, 3, 3, 5, 5, ...>
1810/// <1, n+1, 3, n+3, 5, n+5, ...>
1811///
1812/// When undef's appear in the mask they are treated as if they were whatever
1813/// value is necessary in order to fit the above forms.
1815 MVT VT, SDValue V1, SDValue V2,
1816 SelectionDAG &DAG) {
1817
1818 const auto &Begin = Mask.begin();
1819 const auto &End = Mask.end();
1820 SDValue OriV1 = V1, OriV2 = V2;
1821
1822 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1823 V1 = OriV1;
1824 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1825 V1 = OriV2;
1826 else
1827 return SDValue();
1828
1829 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1830 V2 = OriV1;
1831 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1832 V2 = OriV2;
1833 else
1834 return SDValue();
1835
1836 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1837}
1838
1839/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1840///
1841/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1842/// of each vector.
1843///
1844/// It is possible to lower into VILVH when the mask consists of two of the
1845/// following forms interleaved:
1846/// <x, x+1, x+2, ...>
1847/// <n+x, n+x+1, n+x+2, ...>
1848/// where n is the number of elements in the vector and x is half n.
1849/// For example:
1850/// <x, x, x+1, x+1, x+2, x+2, ...>
1851/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1852///
1853/// When undef's appear in the mask they are treated as if they were whatever
1854/// value is necessary in order to fit the above forms.
1856 MVT VT, SDValue V1, SDValue V2,
1857 SelectionDAG &DAG) {
1858
1859 const auto &Begin = Mask.begin();
1860 const auto &End = Mask.end();
1861 unsigned HalfSize = Mask.size() / 2;
1862 SDValue OriV1 = V1, OriV2 = V2;
1863
1864 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1865 V1 = OriV1;
1866 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1867 V1 = OriV2;
1868 else
1869 return SDValue();
1870
1871 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1872 V2 = OriV1;
1873 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1874 1))
1875 V2 = OriV2;
1876 else
1877 return SDValue();
1878
1879 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1880}
1881
1882/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1883///
1884/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1885/// of each vector.
1886///
1887/// It is possible to lower into VILVL when the mask consists of two of the
1888/// following forms interleaved:
1889/// <0, 1, 2, ...>
1890/// <n, n+1, n+2, ...>
1891/// where n is the number of elements in the vector.
1892/// For example:
1893/// <0, 0, 1, 1, 2, 2, ...>
1894/// <0, n, 1, n+1, 2, n+2, ...>
1895///
1896/// When undef's appear in the mask they are treated as if they were whatever
1897/// value is necessary in order to fit the above forms.
1899 MVT VT, SDValue V1, SDValue V2,
1900 SelectionDAG &DAG) {
1901
1902 const auto &Begin = Mask.begin();
1903 const auto &End = Mask.end();
1904 SDValue OriV1 = V1, OriV2 = V2;
1905
1906 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1907 V1 = OriV1;
1908 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1909 V1 = OriV2;
1910 else
1911 return SDValue();
1912
1913 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1914 V2 = OriV1;
1915 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1916 V2 = OriV2;
1917 else
1918 return SDValue();
1919
1920 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1921}
1922
1923/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1924///
1925/// VPICKEV copies the even elements of each vector into the result vector.
1926///
1927/// It is possible to lower into VPICKEV when the mask consists of two of the
1928/// following forms concatenated:
1929/// <0, 2, 4, ...>
1930/// <n, n+2, n+4, ...>
1931/// where n is the number of elements in the vector.
1932/// For example:
1933/// <0, 2, 4, ..., 0, 2, 4, ...>
1934/// <0, 2, 4, ..., n, n+2, n+4, ...>
1935///
1936/// When undef's appear in the mask they are treated as if they were whatever
1937/// value is necessary in order to fit the above forms.
1939 MVT VT, SDValue V1, SDValue V2,
1940 SelectionDAG &DAG) {
1941
1942 const auto &Begin = Mask.begin();
1943 const auto &Mid = Mask.begin() + Mask.size() / 2;
1944 const auto &End = Mask.end();
1945 SDValue OriV1 = V1, OriV2 = V2;
1946
1947 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1948 V1 = OriV1;
1949 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1950 V1 = OriV2;
1951 else
1952 return SDValue();
1953
1954 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1955 V2 = OriV1;
1956 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1957 V2 = OriV2;
1958
1959 else
1960 return SDValue();
1961
1962 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1963}
1964
1965/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1966///
1967/// VPICKOD copies the odd elements of each vector into the result vector.
1968///
1969/// It is possible to lower into VPICKOD when the mask consists of two of the
1970/// following forms concatenated:
1971/// <1, 3, 5, ...>
1972/// <n+1, n+3, n+5, ...>
1973/// where n is the number of elements in the vector.
1974/// For example:
1975/// <1, 3, 5, ..., 1, 3, 5, ...>
1976/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1977///
1978/// When undef's appear in the mask they are treated as if they were whatever
1979/// value is necessary in order to fit the above forms.
1981 MVT VT, SDValue V1, SDValue V2,
1982 SelectionDAG &DAG) {
1983
1984 const auto &Begin = Mask.begin();
1985 const auto &Mid = Mask.begin() + Mask.size() / 2;
1986 const auto &End = Mask.end();
1987 SDValue OriV1 = V1, OriV2 = V2;
1988
1989 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1990 V1 = OriV1;
1991 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1992 V1 = OriV2;
1993 else
1994 return SDValue();
1995
1996 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1997 V2 = OriV1;
1998 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1999 V2 = OriV2;
2000 else
2001 return SDValue();
2002
2003 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2004}
2005
2006/// Lower VECTOR_SHUFFLE into VSHUF.
2007///
2008/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2009/// adding it as an operand to the resulting VSHUF.
2011 MVT VT, SDValue V1, SDValue V2,
2012 SelectionDAG &DAG,
2013 const LoongArchSubtarget &Subtarget) {
2014
2016 for (auto M : Mask)
2017 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2018
2019 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2020 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2021
2022 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2023 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2024 // VSHF concatenates the vectors in a bitwise fashion:
2025 // <0b00, 0b01> + <0b10, 0b11> ->
2026 // 0b0100 + 0b1110 -> 0b01001110
2027 // <0b10, 0b11, 0b00, 0b01>
2028 // We must therefore swap the operands to get the correct result.
2029 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2030}
2031
2032/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2033///
2034/// This routine breaks down the specific type of 128-bit shuffle and
2035/// dispatches to the lowering routines accordingly.
2037 SDValue V1, SDValue V2, SelectionDAG &DAG,
2038 const LoongArchSubtarget &Subtarget) {
2039 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2040 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2041 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2042 "Vector type is unsupported for lsx!");
2044 "Two operands have different types!");
2045 assert(VT.getVectorNumElements() == Mask.size() &&
2046 "Unexpected mask size for shuffle!");
2047 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2048
2049 APInt KnownUndef, KnownZero;
2050 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2051 APInt Zeroable = KnownUndef | KnownZero;
2052
2053 SDValue Result;
2054 // TODO: Add more comparison patterns.
2055 if (V2.isUndef()) {
2056 if ((Result =
2057 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2058 return Result;
2059 if ((Result =
2060 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2061 return Result;
2062 if ((Result =
2063 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2064 return Result;
2065
2066 // TODO: This comment may be enabled in the future to better match the
2067 // pattern for instruction selection.
2068 /* V2 = V1; */
2069 }
2070
2071 // It is recommended not to change the pattern comparison order for better
2072 // performance.
2073 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2074 return Result;
2075 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2076 return Result;
2077 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2078 return Result;
2079 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2080 return Result;
2081 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2082 return Result;
2083 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2084 return Result;
2085 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2086 (Result =
2087 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2088 return Result;
2089 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2090 Zeroable)))
2091 return Result;
2092 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2093 Zeroable)))
2094 return Result;
2095 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2096 Subtarget)))
2097 return Result;
2098 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2099 return NewShuffle;
2100 if ((Result =
2101 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2102 return Result;
2103 return SDValue();
2104}
2105
2106/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2107///
2108/// It is a XVREPLVEI when the mask is:
2109/// <x, x, x, ..., x+n, x+n, x+n, ...>
2110/// where the number of x is equal to n and n is half the length of vector.
2111///
2112/// When undef's appear in the mask they are treated as if they were whatever
2113/// value is necessary in order to fit the above form.
2114static SDValue
2116 SDValue V1, SelectionDAG &DAG,
2117 const LoongArchSubtarget &Subtarget) {
2118 int SplatIndex = -1;
2119 for (const auto &M : Mask) {
2120 if (M != -1) {
2121 SplatIndex = M;
2122 break;
2123 }
2124 }
2125
2126 if (SplatIndex == -1)
2127 return DAG.getUNDEF(VT);
2128
2129 const auto &Begin = Mask.begin();
2130 const auto &End = Mask.end();
2131 int HalfSize = Mask.size() / 2;
2132
2133 if (SplatIndex >= HalfSize)
2134 return SDValue();
2135
2136 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2137 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2138 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2139 0)) {
2140 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2141 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2142 }
2143
2144 return SDValue();
2145}
2146
2147/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2148static SDValue
2150 SDValue V1, SDValue V2, SelectionDAG &DAG,
2151 const LoongArchSubtarget &Subtarget) {
2152 // When the size is less than or equal to 4, lower cost instructions may be
2153 // used.
2154 if (Mask.size() <= 4)
2155 return SDValue();
2156 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2157}
2158
2159/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2160static SDValue
2162 SDValue V1, SelectionDAG &DAG,
2163 const LoongArchSubtarget &Subtarget) {
2164 // Only consider XVPERMI_D.
2165 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2166 return SDValue();
2167
2168 unsigned MaskImm = 0;
2169 for (unsigned i = 0; i < Mask.size(); ++i) {
2170 if (Mask[i] == -1)
2171 continue;
2172 MaskImm |= Mask[i] << (i * 2);
2173 }
2174
2175 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2176 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2177}
2178
2179/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2181 MVT VT, SDValue V1, SelectionDAG &DAG,
2182 const LoongArchSubtarget &Subtarget) {
2183 // LoongArch LASX only have XVPERM_W.
2184 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2185 return SDValue();
2186
2187 unsigned NumElts = VT.getVectorNumElements();
2188 unsigned HalfSize = NumElts / 2;
2189 bool FrontLo = true, FrontHi = true;
2190 bool BackLo = true, BackHi = true;
2191
2192 auto inRange = [](int val, int low, int high) {
2193 return (val == -1) || (val >= low && val < high);
2194 };
2195
2196 for (unsigned i = 0; i < HalfSize; ++i) {
2197 int Fronti = Mask[i];
2198 int Backi = Mask[i + HalfSize];
2199
2200 FrontLo &= inRange(Fronti, 0, HalfSize);
2201 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2202 BackLo &= inRange(Backi, 0, HalfSize);
2203 BackHi &= inRange(Backi, HalfSize, NumElts);
2204 }
2205
2206 // If both the lower and upper 128-bit parts access only one half of the
2207 // vector (either lower or upper), avoid using xvperm.w. The latency of
2208 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2209 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2210 return SDValue();
2211
2213 MVT GRLenVT = Subtarget.getGRLenVT();
2214 for (unsigned i = 0; i < NumElts; ++i)
2215 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2216 : DAG.getConstant(Mask[i], DL, GRLenVT));
2217 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2218
2219 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2220}
2221
2222/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2224 MVT VT, SDValue V1, SDValue V2,
2225 SelectionDAG &DAG) {
2226 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2227}
2228
2229/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2231 MVT VT, SDValue V1, SDValue V2,
2232 SelectionDAG &DAG) {
2233 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2234}
2235
2236/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2238 MVT VT, SDValue V1, SDValue V2,
2239 SelectionDAG &DAG) {
2240
2241 const auto &Begin = Mask.begin();
2242 const auto &End = Mask.end();
2243 unsigned HalfSize = Mask.size() / 2;
2244 unsigned LeftSize = HalfSize / 2;
2245 SDValue OriV1 = V1, OriV2 = V2;
2246
2247 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2248 1) &&
2249 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2250 V1 = OriV1;
2251 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2252 Mask.size() + HalfSize - LeftSize, 1) &&
2253 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2254 Mask.size() + HalfSize + LeftSize, 1))
2255 V1 = OriV2;
2256 else
2257 return SDValue();
2258
2259 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2260 1) &&
2261 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2262 1))
2263 V2 = OriV1;
2264 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2265 Mask.size() + HalfSize - LeftSize, 1) &&
2266 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2267 Mask.size() + HalfSize + LeftSize, 1))
2268 V2 = OriV2;
2269 else
2270 return SDValue();
2271
2272 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2273}
2274
2275/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2277 MVT VT, SDValue V1, SDValue V2,
2278 SelectionDAG &DAG) {
2279
2280 const auto &Begin = Mask.begin();
2281 const auto &End = Mask.end();
2282 unsigned HalfSize = Mask.size() / 2;
2283 SDValue OriV1 = V1, OriV2 = V2;
2284
2285 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2286 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2287 V1 = OriV1;
2288 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2289 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2290 Mask.size() + HalfSize, 1))
2291 V1 = OriV2;
2292 else
2293 return SDValue();
2294
2295 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2296 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2297 V2 = OriV1;
2298 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2299 1) &&
2300 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2301 Mask.size() + HalfSize, 1))
2302 V2 = OriV2;
2303 else
2304 return SDValue();
2305
2306 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2307}
2308
2309/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2311 MVT VT, SDValue V1, SDValue V2,
2312 SelectionDAG &DAG) {
2313
2314 const auto &Begin = Mask.begin();
2315 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2316 const auto &Mid = Mask.begin() + Mask.size() / 2;
2317 const auto &RightMid = Mask.end() - Mask.size() / 4;
2318 const auto &End = Mask.end();
2319 unsigned HalfSize = Mask.size() / 2;
2320 SDValue OriV1 = V1, OriV2 = V2;
2321
2322 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2323 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2324 V1 = OriV1;
2325 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2326 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2327 V1 = OriV2;
2328 else
2329 return SDValue();
2330
2331 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2332 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2333 V2 = OriV1;
2334 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2335 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2336 V2 = OriV2;
2337
2338 else
2339 return SDValue();
2340
2341 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2342}
2343
2344/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2346 MVT VT, SDValue V1, SDValue V2,
2347 SelectionDAG &DAG) {
2348
2349 const auto &Begin = Mask.begin();
2350 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2351 const auto &Mid = Mask.begin() + Mask.size() / 2;
2352 const auto &RightMid = Mask.end() - Mask.size() / 4;
2353 const auto &End = Mask.end();
2354 unsigned HalfSize = Mask.size() / 2;
2355 SDValue OriV1 = V1, OriV2 = V2;
2356
2357 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2358 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2359 V1 = OriV1;
2360 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2361 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2362 2))
2363 V1 = OriV2;
2364 else
2365 return SDValue();
2366
2367 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2368 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2369 V2 = OriV1;
2370 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2371 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2372 2))
2373 V2 = OriV2;
2374 else
2375 return SDValue();
2376
2377 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2378}
2379
2380/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2381static SDValue
2383 SDValue V1, SDValue V2, SelectionDAG &DAG,
2384 const LoongArchSubtarget &Subtarget) {
2385 // LoongArch LASX only supports xvinsve0.{w/d}.
2386 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2387 VT != MVT::v4f64)
2388 return SDValue();
2389
2390 MVT GRLenVT = Subtarget.getGRLenVT();
2391 int MaskSize = Mask.size();
2392 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2393
2394 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2395 // all other elements are either 'Base + i' or undef (-1). On success, return
2396 // the index of the replaced element. Otherwise, just return -1.
2397 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2398 int Idx = -1;
2399 for (int i = 0; i < MaskSize; ++i) {
2400 if (Mask[i] == Base + i || Mask[i] == -1)
2401 continue;
2402 if (Mask[i] != Replaced)
2403 return -1;
2404 if (Idx == -1)
2405 Idx = i;
2406 else
2407 return -1;
2408 }
2409 return Idx;
2410 };
2411
2412 // Case 1: the lowest element of V2 replaces one element in V1.
2413 int Idx = checkReplaceOne(0, MaskSize);
2414 if (Idx != -1)
2415 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2416 DAG.getConstant(Idx, DL, GRLenVT));
2417
2418 // Case 2: the lowest element of V1 replaces one element in V2.
2419 Idx = checkReplaceOne(MaskSize, 0);
2420 if (Idx != -1)
2421 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2422 DAG.getConstant(Idx, DL, GRLenVT));
2423
2424 return SDValue();
2425}
2426
2427/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2429 MVT VT, SDValue V1, SDValue V2,
2430 SelectionDAG &DAG) {
2431
2432 int MaskSize = Mask.size();
2433 int HalfSize = Mask.size() / 2;
2434 const auto &Begin = Mask.begin();
2435 const auto &Mid = Mask.begin() + HalfSize;
2436 const auto &End = Mask.end();
2437
2438 // VECTOR_SHUFFLE concatenates the vectors:
2439 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2440 // shuffling ->
2441 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2442 //
2443 // XVSHUF concatenates the vectors:
2444 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2445 // shuffling ->
2446 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2447 SmallVector<SDValue, 8> MaskAlloc;
2448 for (auto it = Begin; it < Mid; it++) {
2449 if (*it < 0) // UNDEF
2450 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2451 else if ((*it >= 0 && *it < HalfSize) ||
2452 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2453 int M = *it < HalfSize ? *it : *it - HalfSize;
2454 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2455 } else
2456 return SDValue();
2457 }
2458 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2459
2460 for (auto it = Mid; it < End; it++) {
2461 if (*it < 0) // UNDEF
2462 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2463 else if ((*it >= HalfSize && *it < MaskSize) ||
2464 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2465 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2466 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2467 } else
2468 return SDValue();
2469 }
2470 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2471
2472 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2473 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2474 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2475}
2476
2477/// Shuffle vectors by lane to generate more optimized instructions.
2478/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2479///
2480/// Therefore, except for the following four cases, other cases are regarded
2481/// as cross-lane shuffles, where optimization is relatively limited.
2482///
2483/// - Shuffle high, low lanes of two inputs vector
2484/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2485/// - Shuffle low, high lanes of two inputs vector
2486/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2487/// - Shuffle low, low lanes of two inputs vector
2488/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2489/// - Shuffle high, high lanes of two inputs vector
2490/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2491///
2492/// The first case is the closest to LoongArch instructions and the other
2493/// cases need to be converted to it for processing.
2494///
2495/// This function will return true for the last three cases above and will
2496/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2497/// cross-lane shuffle cases.
2499 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2500 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2501
2502 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2503
2504 int MaskSize = Mask.size();
2505 int HalfSize = Mask.size() / 2;
2506 MVT GRLenVT = Subtarget.getGRLenVT();
2507
2508 HalfMaskType preMask = None, postMask = None;
2509
2510 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2511 return M < 0 || (M >= 0 && M < HalfSize) ||
2512 (M >= MaskSize && M < MaskSize + HalfSize);
2513 }))
2514 preMask = HighLaneTy;
2515 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2516 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2517 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2518 }))
2519 preMask = LowLaneTy;
2520
2521 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2522 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2523 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2524 }))
2525 postMask = LowLaneTy;
2526 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2527 return M < 0 || (M >= 0 && M < HalfSize) ||
2528 (M >= MaskSize && M < MaskSize + HalfSize);
2529 }))
2530 postMask = HighLaneTy;
2531
2532 // The pre-half of mask is high lane type, and the post-half of mask
2533 // is low lane type, which is closest to the LoongArch instructions.
2534 //
2535 // Note: In the LoongArch architecture, the high lane of mask corresponds
2536 // to the lower 128-bit of vector register, and the low lane of mask
2537 // corresponds the higher 128-bit of vector register.
2538 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2539 return false;
2540 }
2541 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2542 V1 = DAG.getBitcast(MVT::v4i64, V1);
2543 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2544 DAG.getConstant(0b01001110, DL, GRLenVT));
2545 V1 = DAG.getBitcast(VT, V1);
2546
2547 if (!V2.isUndef()) {
2548 V2 = DAG.getBitcast(MVT::v4i64, V2);
2549 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2550 DAG.getConstant(0b01001110, DL, GRLenVT));
2551 V2 = DAG.getBitcast(VT, V2);
2552 }
2553
2554 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2555 *it = *it < 0 ? *it : *it - HalfSize;
2556 }
2557 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2558 *it = *it < 0 ? *it : *it + HalfSize;
2559 }
2560 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2561 V1 = DAG.getBitcast(MVT::v4i64, V1);
2562 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2563 DAG.getConstant(0b11101110, DL, GRLenVT));
2564 V1 = DAG.getBitcast(VT, V1);
2565
2566 if (!V2.isUndef()) {
2567 V2 = DAG.getBitcast(MVT::v4i64, V2);
2568 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2569 DAG.getConstant(0b11101110, DL, GRLenVT));
2570 V2 = DAG.getBitcast(VT, V2);
2571 }
2572
2573 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2574 *it = *it < 0 ? *it : *it - HalfSize;
2575 }
2576 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2577 V1 = DAG.getBitcast(MVT::v4i64, V1);
2578 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2579 DAG.getConstant(0b01000100, DL, GRLenVT));
2580 V1 = DAG.getBitcast(VT, V1);
2581
2582 if (!V2.isUndef()) {
2583 V2 = DAG.getBitcast(MVT::v4i64, V2);
2584 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2585 DAG.getConstant(0b01000100, DL, GRLenVT));
2586 V2 = DAG.getBitcast(VT, V2);
2587 }
2588
2589 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2590 *it = *it < 0 ? *it : *it + HalfSize;
2591 }
2592 } else { // cross-lane
2593 return false;
2594 }
2595
2596 return true;
2597}
2598
2599/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2600/// Only for 256-bit vector.
2601///
2602/// For example:
2603/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2604/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2605/// is lowerded to:
2606/// (XVPERMI $xr2, $xr0, 78)
2607/// (XVSHUF $xr1, $xr2, $xr0)
2608/// (XVORI $xr0, $xr1, 0)
2610 ArrayRef<int> Mask,
2611 MVT VT, SDValue V1,
2612 SDValue V2,
2613 SelectionDAG &DAG) {
2614 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2615 int Size = Mask.size();
2616 int LaneSize = Size / 2;
2617
2618 bool LaneCrossing[2] = {false, false};
2619 for (int i = 0; i < Size; ++i)
2620 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2621 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2622
2623 // Ensure that all lanes ared involved.
2624 if (!LaneCrossing[0] && !LaneCrossing[1])
2625 return SDValue();
2626
2627 SmallVector<int> InLaneMask;
2628 InLaneMask.assign(Mask.begin(), Mask.end());
2629 for (int i = 0; i < Size; ++i) {
2630 int &M = InLaneMask[i];
2631 if (M < 0)
2632 continue;
2633 if (((M % Size) / LaneSize) != (i / LaneSize))
2634 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2635 }
2636
2637 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2638 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2639 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2640 Flipped = DAG.getBitcast(VT, Flipped);
2641 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2642}
2643
2644/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2645///
2646/// This routine breaks down the specific type of 256-bit shuffle and
2647/// dispatches to the lowering routines accordingly.
2649 SDValue V1, SDValue V2, SelectionDAG &DAG,
2650 const LoongArchSubtarget &Subtarget) {
2651 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2652 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2653 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2654 "Vector type is unsupported for lasx!");
2656 "Two operands have different types!");
2657 assert(VT.getVectorNumElements() == Mask.size() &&
2658 "Unexpected mask size for shuffle!");
2659 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2660 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2661
2662 APInt KnownUndef, KnownZero;
2663 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2664 APInt Zeroable = KnownUndef | KnownZero;
2665
2666 SDValue Result;
2667 // TODO: Add more comparison patterns.
2668 if (V2.isUndef()) {
2669 if ((Result =
2670 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2671 return Result;
2672 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2673 Subtarget)))
2674 return Result;
2675 // Try to widen vectors to gain more optimization opportunities.
2676 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2677 return NewShuffle;
2678 if ((Result =
2679 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2680 return Result;
2681 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2682 return Result;
2683 if ((Result =
2684 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2685 return Result;
2686
2687 // TODO: This comment may be enabled in the future to better match the
2688 // pattern for instruction selection.
2689 /* V2 = V1; */
2690 }
2691
2692 // It is recommended not to change the pattern comparison order for better
2693 // performance.
2694 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2695 return Result;
2696 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2697 return Result;
2698 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2699 return Result;
2700 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2701 return Result;
2702 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2703 return Result;
2704 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2705 return Result;
2706 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2707 Zeroable)))
2708 return Result;
2709 if ((Result =
2710 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2711 return Result;
2712 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2713 Subtarget)))
2714 return Result;
2715
2716 // canonicalize non cross-lane shuffle vector
2717 SmallVector<int> NewMask(Mask);
2718 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2719 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2720
2721 // FIXME: Handling the remaining cases earlier can degrade performance
2722 // in some situations. Further analysis is required to enable more
2723 // effective optimizations.
2724 if (V2.isUndef()) {
2725 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2726 V1, V2, DAG)))
2727 return Result;
2728 }
2729
2730 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2731 return NewShuffle;
2732 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2733 return Result;
2734
2735 return SDValue();
2736}
2737
2738SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2739 SelectionDAG &DAG) const {
2740 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2741 ArrayRef<int> OrigMask = SVOp->getMask();
2742 SDValue V1 = Op.getOperand(0);
2743 SDValue V2 = Op.getOperand(1);
2744 MVT VT = Op.getSimpleValueType();
2745 int NumElements = VT.getVectorNumElements();
2746 SDLoc DL(Op);
2747
2748 bool V1IsUndef = V1.isUndef();
2749 bool V2IsUndef = V2.isUndef();
2750 if (V1IsUndef && V2IsUndef)
2751 return DAG.getUNDEF(VT);
2752
2753 // When we create a shuffle node we put the UNDEF node to second operand,
2754 // but in some cases the first operand may be transformed to UNDEF.
2755 // In this case we should just commute the node.
2756 if (V1IsUndef)
2757 return DAG.getCommutedVectorShuffle(*SVOp);
2758
2759 // Check for non-undef masks pointing at an undef vector and make the masks
2760 // undef as well. This makes it easier to match the shuffle based solely on
2761 // the mask.
2762 if (V2IsUndef &&
2763 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2764 SmallVector<int, 8> NewMask(OrigMask);
2765 for (int &M : NewMask)
2766 if (M >= NumElements)
2767 M = -1;
2768 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2769 }
2770
2771 // Check for illegal shuffle mask element index values.
2772 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2773 (void)MaskUpperLimit;
2774 assert(llvm::all_of(OrigMask,
2775 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2776 "Out of bounds shuffle index");
2777
2778 // For each vector width, delegate to a specialized lowering routine.
2779 if (VT.is128BitVector())
2780 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2781
2782 if (VT.is256BitVector())
2783 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2784
2785 return SDValue();
2786}
2787
2788SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2789 SelectionDAG &DAG) const {
2790 // Custom lower to ensure the libcall return is passed in an FPR on hard
2791 // float ABIs.
2792 SDLoc DL(Op);
2793 MakeLibCallOptions CallOptions;
2794 SDValue Op0 = Op.getOperand(0);
2795 SDValue Chain = SDValue();
2796 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2797 SDValue Res;
2798 std::tie(Res, Chain) =
2799 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2800 if (Subtarget.is64Bit())
2801 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2802 return DAG.getBitcast(MVT::i32, Res);
2803}
2804
2805SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2806 SelectionDAG &DAG) const {
2807 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2808 // float ABIs.
2809 SDLoc DL(Op);
2810 MakeLibCallOptions CallOptions;
2811 SDValue Op0 = Op.getOperand(0);
2812 SDValue Chain = SDValue();
2813 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2814 DL, MVT::f32, Op0)
2815 : DAG.getBitcast(MVT::f32, Op0);
2816 SDValue Res;
2817 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2818 CallOptions, DL, Chain);
2819 return Res;
2820}
2821
2822SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2823 SelectionDAG &DAG) const {
2824 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2825 SDLoc DL(Op);
2826 MakeLibCallOptions CallOptions;
2827 RTLIB::Libcall LC =
2828 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2829 SDValue Res =
2830 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2831 if (Subtarget.is64Bit())
2832 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2833 return DAG.getBitcast(MVT::i32, Res);
2834}
2835
2836SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2837 SelectionDAG &DAG) const {
2838 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2839 MVT VT = Op.getSimpleValueType();
2840 SDLoc DL(Op);
2841 Op = DAG.getNode(
2842 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2843 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2844 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2845 DL, MVT::f32, Op)
2846 : DAG.getBitcast(MVT::f32, Op);
2847 if (VT != MVT::f32)
2848 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2849 return Res;
2850}
2851
2852// Lower BUILD_VECTOR as broadcast load (if possible).
2853// For example:
2854// %a = load i8, ptr %ptr
2855// %b = build_vector %a, %a, %a, %a
2856// is lowered to :
2857// (VLDREPL_B $a0, 0)
2859 const SDLoc &DL,
2860 SelectionDAG &DAG) {
2861 MVT VT = BVOp->getSimpleValueType(0);
2862 int NumOps = BVOp->getNumOperands();
2863
2864 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2865 "Unsupported vector type for broadcast.");
2866
2867 SDValue IdentitySrc;
2868 bool IsIdeneity = true;
2869
2870 for (int i = 0; i != NumOps; i++) {
2871 SDValue Op = BVOp->getOperand(i);
2872 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2873 IsIdeneity = false;
2874 break;
2875 }
2876 IdentitySrc = BVOp->getOperand(0);
2877 }
2878
2879 // make sure that this load is valid and only has one user.
2880 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2881 return SDValue();
2882
2883 auto *LN = cast<LoadSDNode>(IdentitySrc);
2884 auto ExtType = LN->getExtensionType();
2885
2886 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2887 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2888 SDVTList Tys =
2889 LN->isIndexed()
2890 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2891 : DAG.getVTList(VT, MVT::Other);
2892 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2893 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2894 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2895 return BCast;
2896 }
2897 return SDValue();
2898}
2899
2900// Sequentially insert elements from Ops into Vector, from low to high indices.
2901// Note: Ops can have fewer elements than Vector.
2903 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2904 EVT ResTy) {
2905 assert(Ops.size() <= ResTy.getVectorNumElements());
2906
2907 SDValue Op0 = Ops[0];
2908 if (!Op0.isUndef())
2909 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2910 for (unsigned i = 1; i < Ops.size(); ++i) {
2911 SDValue Opi = Ops[i];
2912 if (Opi.isUndef())
2913 continue;
2914 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2915 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2916 }
2917}
2918
2919// Build a ResTy subvector from Node, taking NumElts elements starting at index
2920// 'first'.
2922 SelectionDAG &DAG, SDLoc DL,
2923 const LoongArchSubtarget &Subtarget,
2924 EVT ResTy, unsigned first) {
2925 unsigned NumElts = ResTy.getVectorNumElements();
2926
2927 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2928
2929 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2930 Node->op_begin() + first + NumElts);
2931 SDValue Vector = DAG.getUNDEF(ResTy);
2932 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2933 return Vector;
2934}
2935
2936SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2937 SelectionDAG &DAG) const {
2938 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2939 MVT VT = Node->getSimpleValueType(0);
2940 EVT ResTy = Op->getValueType(0);
2941 unsigned NumElts = ResTy.getVectorNumElements();
2942 SDLoc DL(Op);
2943 APInt SplatValue, SplatUndef;
2944 unsigned SplatBitSize;
2945 bool HasAnyUndefs;
2946 bool IsConstant = false;
2947 bool UseSameConstant = true;
2948 SDValue ConstantValue;
2949 bool Is128Vec = ResTy.is128BitVector();
2950 bool Is256Vec = ResTy.is256BitVector();
2951
2952 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2953 (!Subtarget.hasExtLASX() || !Is256Vec))
2954 return SDValue();
2955
2956 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2957 return Result;
2958
2959 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2960 /*MinSplatBits=*/8) &&
2961 SplatBitSize <= 64) {
2962 // We can only cope with 8, 16, 32, or 64-bit elements.
2963 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2964 SplatBitSize != 64)
2965 return SDValue();
2966
2967 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2968 // We can only handle 64-bit elements that are within
2969 // the signed 10-bit range or match vldi patterns on 32-bit targets.
2970 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2971 if (!SplatValue.isSignedIntN(10) &&
2972 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
2973 return SDValue();
2974 if ((Is128Vec && ResTy == MVT::v4i32) ||
2975 (Is256Vec && ResTy == MVT::v8i32))
2976 return Op;
2977 }
2978
2979 EVT ViaVecTy;
2980
2981 switch (SplatBitSize) {
2982 default:
2983 return SDValue();
2984 case 8:
2985 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2986 break;
2987 case 16:
2988 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2989 break;
2990 case 32:
2991 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2992 break;
2993 case 64:
2994 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2995 break;
2996 }
2997
2998 // SelectionDAG::getConstant will promote SplatValue appropriately.
2999 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3000
3001 // Bitcast to the type we originally wanted.
3002 if (ViaVecTy != ResTy)
3003 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3004
3005 return Result;
3006 }
3007
3008 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3009 return Op;
3010
3011 for (unsigned i = 0; i < NumElts; ++i) {
3012 SDValue Opi = Node->getOperand(i);
3013 if (isIntOrFPConstant(Opi)) {
3014 IsConstant = true;
3015 if (!ConstantValue.getNode())
3016 ConstantValue = Opi;
3017 else if (ConstantValue != Opi)
3018 UseSameConstant = false;
3019 }
3020 }
3021
3022 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3023 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3024 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3025 for (unsigned i = 0; i < NumElts; ++i) {
3026 SDValue Opi = Node->getOperand(i);
3027 if (!isIntOrFPConstant(Opi))
3028 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3029 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3030 }
3031 return Result;
3032 }
3033
3034 if (!IsConstant) {
3035 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3036 // the sub-sequence of the vector and then broadcast the sub-sequence.
3037 //
3038 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3039 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3040 // generates worse code in some cases. This could be further optimized
3041 // with more consideration.
3043 BitVector UndefElements;
3044 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3045 UndefElements.count() == 0) {
3046 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3047 // because the high part can be simply treated as undef.
3048 SDValue Vector = DAG.getUNDEF(ResTy);
3049 EVT FillTy = Is256Vec
3051 : ResTy;
3052 SDValue FillVec =
3053 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3054
3055 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3056
3057 unsigned SeqLen = Sequence.size();
3058 unsigned SplatLen = NumElts / SeqLen;
3059 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3060 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3061
3062 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3063 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3064 if (SplatEltTy == MVT::i128)
3065 SplatTy = MVT::v4i64;
3066
3067 SDValue SplatVec;
3068 SDValue SrcVec = DAG.getBitcast(
3069 SplatTy,
3070 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3071 if (Is256Vec) {
3072 SplatVec =
3073 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3075 DL, SplatTy, SrcVec);
3076 } else {
3077 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3078 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3079 }
3080
3081 return DAG.getBitcast(ResTy, SplatVec);
3082 }
3083
3084 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3085 // using memory operations is much lower.
3086 //
3087 // For 256-bit vectors, normally split into two halves and concatenate.
3088 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3089 // one non-undef element, skip spliting to avoid a worse result.
3090 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3091 ResTy == MVT::v4f64) {
3092 unsigned NonUndefCount = 0;
3093 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3094 if (!Node->getOperand(i).isUndef()) {
3095 ++NonUndefCount;
3096 if (NonUndefCount > 1)
3097 break;
3098 }
3099 }
3100 if (NonUndefCount == 1)
3101 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3102 }
3103
3104 EVT VecTy =
3105 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3106 SDValue Vector =
3107 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3108
3109 if (Is128Vec)
3110 return Vector;
3111
3112 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3113 VecTy, NumElts / 2);
3114
3115 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3116 }
3117
3118 return SDValue();
3119}
3120
3121SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3122 SelectionDAG &DAG) const {
3123 SDLoc DL(Op);
3124 MVT ResVT = Op.getSimpleValueType();
3125 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3126
3127 unsigned NumOperands = Op.getNumOperands();
3128 unsigned NumFreezeUndef = 0;
3129 unsigned NumZero = 0;
3130 unsigned NumNonZero = 0;
3131 unsigned NonZeros = 0;
3132 SmallSet<SDValue, 4> Undefs;
3133 for (unsigned i = 0; i != NumOperands; ++i) {
3134 SDValue SubVec = Op.getOperand(i);
3135 if (SubVec.isUndef())
3136 continue;
3137 if (ISD::isFreezeUndef(SubVec.getNode())) {
3138 // If the freeze(undef) has multiple uses then we must fold to zero.
3139 if (SubVec.hasOneUse()) {
3140 ++NumFreezeUndef;
3141 } else {
3142 ++NumZero;
3143 Undefs.insert(SubVec);
3144 }
3145 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3146 ++NumZero;
3147 else {
3148 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3149 NonZeros |= 1 << i;
3150 ++NumNonZero;
3151 }
3152 }
3153
3154 // If we have more than 2 non-zeros, build each half separately.
3155 if (NumNonZero > 2) {
3156 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3157 ArrayRef<SDUse> Ops = Op->ops();
3158 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3159 Ops.slice(0, NumOperands / 2));
3160 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3161 Ops.slice(NumOperands / 2));
3162 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3163 }
3164
3165 // Otherwise, build it up through insert_subvectors.
3166 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3167 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3168 : DAG.getUNDEF(ResVT));
3169
3170 // Replace Undef operands with ZeroVector.
3171 for (SDValue U : Undefs)
3172 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3173
3174 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3175 unsigned NumSubElems = SubVT.getVectorNumElements();
3176 for (unsigned i = 0; i != NumOperands; ++i) {
3177 if ((NonZeros & (1 << i)) == 0)
3178 continue;
3179
3180 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3181 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3182 }
3183
3184 return Vec;
3185}
3186
3187SDValue
3188LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3189 SelectionDAG &DAG) const {
3190 MVT EltVT = Op.getSimpleValueType();
3191 SDValue Vec = Op->getOperand(0);
3192 EVT VecTy = Vec->getValueType(0);
3193 SDValue Idx = Op->getOperand(1);
3194 SDLoc DL(Op);
3195 MVT GRLenVT = Subtarget.getGRLenVT();
3196
3197 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3198
3199 if (isa<ConstantSDNode>(Idx))
3200 return Op;
3201
3202 switch (VecTy.getSimpleVT().SimpleTy) {
3203 default:
3204 llvm_unreachable("Unexpected type");
3205 case MVT::v32i8:
3206 case MVT::v16i16:
3207 case MVT::v4i64:
3208 case MVT::v4f64: {
3209 // Extract the high half subvector and place it to the low half of a new
3210 // vector. It doesn't matter what the high half of the new vector is.
3211 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3212 SDValue VecHi =
3213 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3214 SDValue TmpVec =
3215 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3216 VecHi, DAG.getConstant(0, DL, GRLenVT));
3217
3218 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3219 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3220 // desired element.
3221 SDValue IdxCp =
3222 Subtarget.is64Bit()
3223 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3224 : DAG.getBitcast(MVT::f32, Idx);
3225 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3226 SDValue MaskVec =
3227 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3228 SDValue ResVec =
3229 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3230
3231 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3232 DAG.getConstant(0, DL, GRLenVT));
3233 }
3234 case MVT::v8i32:
3235 case MVT::v8f32: {
3236 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3237 SDValue SplatValue =
3238 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3239
3240 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3241 DAG.getConstant(0, DL, GRLenVT));
3242 }
3243 }
3244}
3245
3246SDValue
3247LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3248 SelectionDAG &DAG) const {
3249 MVT VT = Op.getSimpleValueType();
3250 MVT EltVT = VT.getVectorElementType();
3251 unsigned NumElts = VT.getVectorNumElements();
3252 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3253 SDLoc DL(Op);
3254 SDValue Op0 = Op.getOperand(0);
3255 SDValue Op1 = Op.getOperand(1);
3256 SDValue Op2 = Op.getOperand(2);
3257
3258 if (isa<ConstantSDNode>(Op2))
3259 return Op;
3260
3261 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3262 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3263
3264 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3265 return SDValue();
3266
3267 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3268 SmallVector<SDValue, 32> RawIndices;
3269 SDValue SplatIdx;
3270 SDValue Indices;
3271
3272 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3273 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3274 for (unsigned i = 0; i < NumElts; ++i) {
3275 RawIndices.push_back(Op2);
3276 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3277 }
3278 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3279 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3280
3281 RawIndices.clear();
3282 for (unsigned i = 0; i < NumElts; ++i) {
3283 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3284 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3285 }
3286 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3287 Indices = DAG.getBitcast(IdxVTy, Indices);
3288 } else {
3289 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3290
3291 for (unsigned i = 0; i < NumElts; ++i)
3292 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3293 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3294 }
3295
3296 // insert vec, elt, idx
3297 // =>
3298 // select (splatidx == {0,1,2...}) ? splatelt : vec
3299 SDValue SelectCC =
3300 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3301 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3302}
3303
3304SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3305 SelectionDAG &DAG) const {
3306 SDLoc DL(Op);
3307 SyncScope::ID FenceSSID =
3308 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3309
3310 // singlethread fences only synchronize with signal handlers on the same
3311 // thread and thus only need to preserve instruction order, not actually
3312 // enforce memory ordering.
3313 if (FenceSSID == SyncScope::SingleThread)
3314 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3315 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3316
3317 return Op;
3318}
3319
3320SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3321 SelectionDAG &DAG) const {
3322
3323 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3324 DAG.getContext()->emitError(
3325 "On LA64, only 64-bit registers can be written.");
3326 return Op.getOperand(0);
3327 }
3328
3329 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3330 DAG.getContext()->emitError(
3331 "On LA32, only 32-bit registers can be written.");
3332 return Op.getOperand(0);
3333 }
3334
3335 return Op;
3336}
3337
3338SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3339 SelectionDAG &DAG) const {
3340 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3341 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3342 "be a constant integer");
3343 return SDValue();
3344 }
3345
3346 MachineFunction &MF = DAG.getMachineFunction();
3348 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3349 EVT VT = Op.getValueType();
3350 SDLoc DL(Op);
3351 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3352 unsigned Depth = Op.getConstantOperandVal(0);
3353 int GRLenInBytes = Subtarget.getGRLen() / 8;
3354
3355 while (Depth--) {
3356 int Offset = -(GRLenInBytes * 2);
3357 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3358 DAG.getSignedConstant(Offset, DL, VT));
3359 FrameAddr =
3360 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3361 }
3362 return FrameAddr;
3363}
3364
3365SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 // Currently only support lowering return address for current frame.
3368 if (Op.getConstantOperandVal(0) != 0) {
3369 DAG.getContext()->emitError(
3370 "return address can only be determined for the current frame");
3371 return SDValue();
3372 }
3373
3374 MachineFunction &MF = DAG.getMachineFunction();
3376 MVT GRLenVT = Subtarget.getGRLenVT();
3377
3378 // Return the value of the return address register, marking it an implicit
3379 // live-in.
3380 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3381 getRegClassFor(GRLenVT));
3382 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3383}
3384
3385SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3386 SelectionDAG &DAG) const {
3387 MachineFunction &MF = DAG.getMachineFunction();
3388 auto Size = Subtarget.getGRLen() / 8;
3389 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3390 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3391}
3392
3393SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3394 SelectionDAG &DAG) const {
3395 MachineFunction &MF = DAG.getMachineFunction();
3396 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3397
3398 SDLoc DL(Op);
3399 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3401
3402 // vastart just stores the address of the VarArgsFrameIndex slot into the
3403 // memory location argument.
3404 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3405 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3406 MachinePointerInfo(SV));
3407}
3408
3409SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3410 SelectionDAG &DAG) const {
3411 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3412 !Subtarget.hasBasicD() && "unexpected target features");
3413
3414 SDLoc DL(Op);
3415 SDValue Op0 = Op.getOperand(0);
3416 if (Op0->getOpcode() == ISD::AND) {
3417 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3418 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3419 return Op;
3420 }
3421
3422 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3423 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3424 Op0.getConstantOperandVal(2) == UINT64_C(0))
3425 return Op;
3426
3427 if (Op0.getOpcode() == ISD::AssertZext &&
3428 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3429 return Op;
3430
3431 EVT OpVT = Op0.getValueType();
3432 EVT RetVT = Op.getValueType();
3433 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3434 MakeLibCallOptions CallOptions;
3435 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3436 SDValue Chain = SDValue();
3438 std::tie(Result, Chain) =
3439 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3440 return Result;
3441}
3442
3443SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3444 SelectionDAG &DAG) const {
3445 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3446 !Subtarget.hasBasicD() && "unexpected target features");
3447
3448 SDLoc DL(Op);
3449 SDValue Op0 = Op.getOperand(0);
3450
3451 if ((Op0.getOpcode() == ISD::AssertSext ||
3453 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3454 return Op;
3455
3456 EVT OpVT = Op0.getValueType();
3457 EVT RetVT = Op.getValueType();
3458 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3459 MakeLibCallOptions CallOptions;
3460 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3461 SDValue Chain = SDValue();
3463 std::tie(Result, Chain) =
3464 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3465 return Result;
3466}
3467
3468SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3469 SelectionDAG &DAG) const {
3470
3471 SDLoc DL(Op);
3472 EVT VT = Op.getValueType();
3473 SDValue Op0 = Op.getOperand(0);
3474 EVT Op0VT = Op0.getValueType();
3475
3476 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3477 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3478 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3479 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3480 }
3481 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3482 SDValue Lo, Hi;
3483 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3484 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3485 }
3486 return Op;
3487}
3488
3489SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3490 SelectionDAG &DAG) const {
3491
3492 SDLoc DL(Op);
3493 SDValue Op0 = Op.getOperand(0);
3494
3495 if (Op0.getValueType() == MVT::f16)
3496 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3497
3498 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3499 !Subtarget.hasBasicD()) {
3500 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3501 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3502 }
3503
3504 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3505 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3506 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3507}
3508
3510 SelectionDAG &DAG, unsigned Flags) {
3511 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3512}
3513
3515 SelectionDAG &DAG, unsigned Flags) {
3516 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3517 Flags);
3518}
3519
3521 SelectionDAG &DAG, unsigned Flags) {
3522 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3523 N->getOffset(), Flags);
3524}
3525
3527 SelectionDAG &DAG, unsigned Flags) {
3528 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3529}
3530
3531template <class NodeTy>
3532SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3534 bool IsLocal) const {
3535 SDLoc DL(N);
3536 EVT Ty = getPointerTy(DAG.getDataLayout());
3537 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3538 SDValue Load;
3539
3540 switch (M) {
3541 default:
3542 report_fatal_error("Unsupported code model");
3543
3544 case CodeModel::Large: {
3545 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3546
3547 // This is not actually used, but is necessary for successfully matching
3548 // the PseudoLA_*_LARGE nodes.
3549 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3550 if (IsLocal) {
3551 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3552 // eventually becomes the desired 5-insn code sequence.
3553 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3554 Tmp, Addr),
3555 0);
3556 } else {
3557 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3558 // eventually becomes the desired 5-insn code sequence.
3559 Load = SDValue(
3560 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3561 0);
3562 }
3563 break;
3564 }
3565
3566 case CodeModel::Small:
3567 case CodeModel::Medium:
3568 if (IsLocal) {
3569 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3570 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3571 Load = SDValue(
3572 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3573 } else {
3574 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3575 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3576 Load =
3577 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3578 }
3579 }
3580
3581 if (!IsLocal) {
3582 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3583 MachineFunction &MF = DAG.getMachineFunction();
3584 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3588 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3589 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3590 }
3591
3592 return Load;
3593}
3594
3595SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3596 SelectionDAG &DAG) const {
3597 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3598 DAG.getTarget().getCodeModel());
3599}
3600
3601SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3602 SelectionDAG &DAG) const {
3603 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3604 DAG.getTarget().getCodeModel());
3605}
3606
3607SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3608 SelectionDAG &DAG) const {
3609 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3610 DAG.getTarget().getCodeModel());
3611}
3612
3613SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3614 SelectionDAG &DAG) const {
3615 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3616 assert(N->getOffset() == 0 && "unexpected offset in global node");
3617 auto CM = DAG.getTarget().getCodeModel();
3618 const GlobalValue *GV = N->getGlobal();
3619
3620 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3621 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3622 CM = *GCM;
3623 }
3624
3625 return getAddr(N, DAG, CM, GV->isDSOLocal());
3626}
3627
3628SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3629 SelectionDAG &DAG,
3630 unsigned Opc, bool UseGOT,
3631 bool Large) const {
3632 SDLoc DL(N);
3633 EVT Ty = getPointerTy(DAG.getDataLayout());
3634 MVT GRLenVT = Subtarget.getGRLenVT();
3635
3636 // This is not actually used, but is necessary for successfully matching the
3637 // PseudoLA_*_LARGE nodes.
3638 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3639 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3640
3641 // Only IE needs an extra argument for large code model.
3642 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3643 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3644 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3645
3646 // If it is LE for normal/medium code model, the add tp operation will occur
3647 // during the pseudo-instruction expansion.
3648 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3649 return Offset;
3650
3651 if (UseGOT) {
3652 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3653 MachineFunction &MF = DAG.getMachineFunction();
3654 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3658 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3659 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3660 }
3661
3662 // Add the thread pointer.
3663 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3664 DAG.getRegister(LoongArch::R2, GRLenVT));
3665}
3666
3667SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3668 SelectionDAG &DAG,
3669 unsigned Opc,
3670 bool Large) const {
3671 SDLoc DL(N);
3672 EVT Ty = getPointerTy(DAG.getDataLayout());
3673 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3674
3675 // This is not actually used, but is necessary for successfully matching the
3676 // PseudoLA_*_LARGE nodes.
3677 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3678
3679 // Use a PC-relative addressing mode to access the dynamic GOT address.
3680 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3681 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3682 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3683
3684 // Prepare argument list to generate call.
3686 Args.emplace_back(Load, CallTy);
3687
3688 // Setup call to __tls_get_addr.
3689 TargetLowering::CallLoweringInfo CLI(DAG);
3690 CLI.setDebugLoc(DL)
3691 .setChain(DAG.getEntryNode())
3692 .setLibCallee(CallingConv::C, CallTy,
3693 DAG.getExternalSymbol("__tls_get_addr", Ty),
3694 std::move(Args));
3695
3696 return LowerCallTo(CLI).first;
3697}
3698
3699SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3700 SelectionDAG &DAG, unsigned Opc,
3701 bool Large) const {
3702 SDLoc DL(N);
3703 EVT Ty = getPointerTy(DAG.getDataLayout());
3704 const GlobalValue *GV = N->getGlobal();
3705
3706 // This is not actually used, but is necessary for successfully matching the
3707 // PseudoLA_*_LARGE nodes.
3708 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3709
3710 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3711 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3712 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3713 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3714 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3715}
3716
3717SDValue
3718LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3719 SelectionDAG &DAG) const {
3722 report_fatal_error("In GHC calling convention TLS is not supported");
3723
3724 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3725 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3726
3727 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3728 assert(N->getOffset() == 0 && "unexpected offset in global node");
3729
3730 if (DAG.getTarget().useEmulatedTLS())
3731 reportFatalUsageError("the emulated TLS is prohibited");
3732
3733 bool IsDesc = DAG.getTarget().useTLSDESC();
3734
3735 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3737 // In this model, application code calls the dynamic linker function
3738 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3739 // runtime.
3740 if (!IsDesc)
3741 return getDynamicTLSAddr(N, DAG,
3742 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3743 : LoongArch::PseudoLA_TLS_GD,
3744 Large);
3745 break;
3747 // Same as GeneralDynamic, except for assembly modifiers and relocation
3748 // records.
3749 if (!IsDesc)
3750 return getDynamicTLSAddr(N, DAG,
3751 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3752 : LoongArch::PseudoLA_TLS_LD,
3753 Large);
3754 break;
3756 // This model uses the GOT to resolve TLS offsets.
3757 return getStaticTLSAddr(N, DAG,
3758 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3759 : LoongArch::PseudoLA_TLS_IE,
3760 /*UseGOT=*/true, Large);
3762 // This model is used when static linking as the TLS offsets are resolved
3763 // during program linking.
3764 //
3765 // This node doesn't need an extra argument for the large code model.
3766 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3767 /*UseGOT=*/false, Large);
3768 }
3769
3770 return getTLSDescAddr(N, DAG,
3771 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3772 : LoongArch::PseudoLA_TLS_DESC,
3773 Large);
3774}
3775
3776template <unsigned N>
3778 SelectionDAG &DAG, bool IsSigned = false) {
3779 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3780 // Check the ImmArg.
3781 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3782 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3783 DAG.getContext()->emitError(Op->getOperationName(0) +
3784 ": argument out of range.");
3785 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3786 }
3787 return SDValue();
3788}
3789
3790SDValue
3791LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3792 SelectionDAG &DAG) const {
3793 switch (Op.getConstantOperandVal(0)) {
3794 default:
3795 return SDValue(); // Don't custom lower most intrinsics.
3796 case Intrinsic::thread_pointer: {
3797 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3798 return DAG.getRegister(LoongArch::R2, PtrVT);
3799 }
3800 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3801 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3802 case Intrinsic::loongarch_lsx_vreplvei_d:
3803 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3804 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3805 case Intrinsic::loongarch_lsx_vreplvei_w:
3806 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3807 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3808 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3809 case Intrinsic::loongarch_lasx_xvpickve_d:
3810 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3811 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3812 case Intrinsic::loongarch_lasx_xvinsve0_d:
3813 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3814 case Intrinsic::loongarch_lsx_vsat_b:
3815 case Intrinsic::loongarch_lsx_vsat_bu:
3816 case Intrinsic::loongarch_lsx_vrotri_b:
3817 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3818 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3819 case Intrinsic::loongarch_lsx_vsrlri_b:
3820 case Intrinsic::loongarch_lsx_vsrari_b:
3821 case Intrinsic::loongarch_lsx_vreplvei_h:
3822 case Intrinsic::loongarch_lasx_xvsat_b:
3823 case Intrinsic::loongarch_lasx_xvsat_bu:
3824 case Intrinsic::loongarch_lasx_xvrotri_b:
3825 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3826 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3827 case Intrinsic::loongarch_lasx_xvsrlri_b:
3828 case Intrinsic::loongarch_lasx_xvsrari_b:
3829 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3830 case Intrinsic::loongarch_lasx_xvpickve_w:
3831 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3832 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3833 case Intrinsic::loongarch_lasx_xvinsve0_w:
3834 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3835 case Intrinsic::loongarch_lsx_vsat_h:
3836 case Intrinsic::loongarch_lsx_vsat_hu:
3837 case Intrinsic::loongarch_lsx_vrotri_h:
3838 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3839 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3840 case Intrinsic::loongarch_lsx_vsrlri_h:
3841 case Intrinsic::loongarch_lsx_vsrari_h:
3842 case Intrinsic::loongarch_lsx_vreplvei_b:
3843 case Intrinsic::loongarch_lasx_xvsat_h:
3844 case Intrinsic::loongarch_lasx_xvsat_hu:
3845 case Intrinsic::loongarch_lasx_xvrotri_h:
3846 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3847 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3848 case Intrinsic::loongarch_lasx_xvsrlri_h:
3849 case Intrinsic::loongarch_lasx_xvsrari_h:
3850 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3851 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3852 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3853 case Intrinsic::loongarch_lsx_vsrani_b_h:
3854 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3855 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3856 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3857 case Intrinsic::loongarch_lsx_vssrani_b_h:
3858 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3859 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3860 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3861 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3862 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3863 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3864 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3865 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3866 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3867 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3868 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3869 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3870 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3871 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3872 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3873 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3874 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3875 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3876 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3877 case Intrinsic::loongarch_lsx_vsat_w:
3878 case Intrinsic::loongarch_lsx_vsat_wu:
3879 case Intrinsic::loongarch_lsx_vrotri_w:
3880 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3881 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3882 case Intrinsic::loongarch_lsx_vsrlri_w:
3883 case Intrinsic::loongarch_lsx_vsrari_w:
3884 case Intrinsic::loongarch_lsx_vslei_bu:
3885 case Intrinsic::loongarch_lsx_vslei_hu:
3886 case Intrinsic::loongarch_lsx_vslei_wu:
3887 case Intrinsic::loongarch_lsx_vslei_du:
3888 case Intrinsic::loongarch_lsx_vslti_bu:
3889 case Intrinsic::loongarch_lsx_vslti_hu:
3890 case Intrinsic::loongarch_lsx_vslti_wu:
3891 case Intrinsic::loongarch_lsx_vslti_du:
3892 case Intrinsic::loongarch_lsx_vbsll_v:
3893 case Intrinsic::loongarch_lsx_vbsrl_v:
3894 case Intrinsic::loongarch_lasx_xvsat_w:
3895 case Intrinsic::loongarch_lasx_xvsat_wu:
3896 case Intrinsic::loongarch_lasx_xvrotri_w:
3897 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3898 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3899 case Intrinsic::loongarch_lasx_xvsrlri_w:
3900 case Intrinsic::loongarch_lasx_xvsrari_w:
3901 case Intrinsic::loongarch_lasx_xvslei_bu:
3902 case Intrinsic::loongarch_lasx_xvslei_hu:
3903 case Intrinsic::loongarch_lasx_xvslei_wu:
3904 case Intrinsic::loongarch_lasx_xvslei_du:
3905 case Intrinsic::loongarch_lasx_xvslti_bu:
3906 case Intrinsic::loongarch_lasx_xvslti_hu:
3907 case Intrinsic::loongarch_lasx_xvslti_wu:
3908 case Intrinsic::loongarch_lasx_xvslti_du:
3909 case Intrinsic::loongarch_lasx_xvbsll_v:
3910 case Intrinsic::loongarch_lasx_xvbsrl_v:
3911 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3912 case Intrinsic::loongarch_lsx_vseqi_b:
3913 case Intrinsic::loongarch_lsx_vseqi_h:
3914 case Intrinsic::loongarch_lsx_vseqi_w:
3915 case Intrinsic::loongarch_lsx_vseqi_d:
3916 case Intrinsic::loongarch_lsx_vslei_b:
3917 case Intrinsic::loongarch_lsx_vslei_h:
3918 case Intrinsic::loongarch_lsx_vslei_w:
3919 case Intrinsic::loongarch_lsx_vslei_d:
3920 case Intrinsic::loongarch_lsx_vslti_b:
3921 case Intrinsic::loongarch_lsx_vslti_h:
3922 case Intrinsic::loongarch_lsx_vslti_w:
3923 case Intrinsic::loongarch_lsx_vslti_d:
3924 case Intrinsic::loongarch_lasx_xvseqi_b:
3925 case Intrinsic::loongarch_lasx_xvseqi_h:
3926 case Intrinsic::loongarch_lasx_xvseqi_w:
3927 case Intrinsic::loongarch_lasx_xvseqi_d:
3928 case Intrinsic::loongarch_lasx_xvslei_b:
3929 case Intrinsic::loongarch_lasx_xvslei_h:
3930 case Intrinsic::loongarch_lasx_xvslei_w:
3931 case Intrinsic::loongarch_lasx_xvslei_d:
3932 case Intrinsic::loongarch_lasx_xvslti_b:
3933 case Intrinsic::loongarch_lasx_xvslti_h:
3934 case Intrinsic::loongarch_lasx_xvslti_w:
3935 case Intrinsic::loongarch_lasx_xvslti_d:
3936 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3937 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3938 case Intrinsic::loongarch_lsx_vsrani_h_w:
3939 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3940 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3941 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3942 case Intrinsic::loongarch_lsx_vssrani_h_w:
3943 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3944 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3945 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3946 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3947 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3948 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3949 case Intrinsic::loongarch_lsx_vfrstpi_b:
3950 case Intrinsic::loongarch_lsx_vfrstpi_h:
3951 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3952 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3953 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3954 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3955 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3956 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3957 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3958 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3959 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3960 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3961 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3962 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3963 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3964 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3965 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3966 case Intrinsic::loongarch_lsx_vsat_d:
3967 case Intrinsic::loongarch_lsx_vsat_du:
3968 case Intrinsic::loongarch_lsx_vrotri_d:
3969 case Intrinsic::loongarch_lsx_vsrlri_d:
3970 case Intrinsic::loongarch_lsx_vsrari_d:
3971 case Intrinsic::loongarch_lasx_xvsat_d:
3972 case Intrinsic::loongarch_lasx_xvsat_du:
3973 case Intrinsic::loongarch_lasx_xvrotri_d:
3974 case Intrinsic::loongarch_lasx_xvsrlri_d:
3975 case Intrinsic::loongarch_lasx_xvsrari_d:
3976 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3977 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3978 case Intrinsic::loongarch_lsx_vsrani_w_d:
3979 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3980 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3981 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3982 case Intrinsic::loongarch_lsx_vssrani_w_d:
3983 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3984 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3985 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3986 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3987 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3988 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3989 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3990 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3991 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3992 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3993 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3994 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3995 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3996 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3997 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3998 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3999 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4000 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4001 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4002 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4003 case Intrinsic::loongarch_lsx_vsrani_d_q:
4004 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4005 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4006 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4007 case Intrinsic::loongarch_lsx_vssrani_d_q:
4008 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4009 case Intrinsic::loongarch_lsx_vssrani_du_q:
4010 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4011 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4012 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4013 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4014 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4015 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4016 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4017 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4018 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4019 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4020 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4021 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4022 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4023 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4024 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4025 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4026 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4027 case Intrinsic::loongarch_lsx_vnori_b:
4028 case Intrinsic::loongarch_lsx_vshuf4i_b:
4029 case Intrinsic::loongarch_lsx_vshuf4i_h:
4030 case Intrinsic::loongarch_lsx_vshuf4i_w:
4031 case Intrinsic::loongarch_lasx_xvnori_b:
4032 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4033 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4034 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4035 case Intrinsic::loongarch_lasx_xvpermi_d:
4036 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4037 case Intrinsic::loongarch_lsx_vshuf4i_d:
4038 case Intrinsic::loongarch_lsx_vpermi_w:
4039 case Intrinsic::loongarch_lsx_vbitseli_b:
4040 case Intrinsic::loongarch_lsx_vextrins_b:
4041 case Intrinsic::loongarch_lsx_vextrins_h:
4042 case Intrinsic::loongarch_lsx_vextrins_w:
4043 case Intrinsic::loongarch_lsx_vextrins_d:
4044 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4045 case Intrinsic::loongarch_lasx_xvpermi_w:
4046 case Intrinsic::loongarch_lasx_xvpermi_q:
4047 case Intrinsic::loongarch_lasx_xvbitseli_b:
4048 case Intrinsic::loongarch_lasx_xvextrins_b:
4049 case Intrinsic::loongarch_lasx_xvextrins_h:
4050 case Intrinsic::loongarch_lasx_xvextrins_w:
4051 case Intrinsic::loongarch_lasx_xvextrins_d:
4052 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4053 case Intrinsic::loongarch_lsx_vrepli_b:
4054 case Intrinsic::loongarch_lsx_vrepli_h:
4055 case Intrinsic::loongarch_lsx_vrepli_w:
4056 case Intrinsic::loongarch_lsx_vrepli_d:
4057 case Intrinsic::loongarch_lasx_xvrepli_b:
4058 case Intrinsic::loongarch_lasx_xvrepli_h:
4059 case Intrinsic::loongarch_lasx_xvrepli_w:
4060 case Intrinsic::loongarch_lasx_xvrepli_d:
4061 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4062 case Intrinsic::loongarch_lsx_vldi:
4063 case Intrinsic::loongarch_lasx_xvldi:
4064 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4065 }
4066}
4067
4068// Helper function that emits error message for intrinsics with chain and return
4069// merge values of a UNDEF and the chain.
4071 StringRef ErrorMsg,
4072 SelectionDAG &DAG) {
4073 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4074 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4075 SDLoc(Op));
4076}
4077
4078SDValue
4079LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4080 SelectionDAG &DAG) const {
4081 SDLoc DL(Op);
4082 MVT GRLenVT = Subtarget.getGRLenVT();
4083 EVT VT = Op.getValueType();
4084 SDValue Chain = Op.getOperand(0);
4085 const StringRef ErrorMsgOOR = "argument out of range";
4086 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4087 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4088
4089 switch (Op.getConstantOperandVal(1)) {
4090 default:
4091 return Op;
4092 case Intrinsic::loongarch_crc_w_b_w:
4093 case Intrinsic::loongarch_crc_w_h_w:
4094 case Intrinsic::loongarch_crc_w_w_w:
4095 case Intrinsic::loongarch_crc_w_d_w:
4096 case Intrinsic::loongarch_crcc_w_b_w:
4097 case Intrinsic::loongarch_crcc_w_h_w:
4098 case Intrinsic::loongarch_crcc_w_w_w:
4099 case Intrinsic::loongarch_crcc_w_d_w:
4100 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4101 case Intrinsic::loongarch_csrrd_w:
4102 case Intrinsic::loongarch_csrrd_d: {
4103 unsigned Imm = Op.getConstantOperandVal(2);
4104 return !isUInt<14>(Imm)
4105 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4106 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4107 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4108 }
4109 case Intrinsic::loongarch_csrwr_w:
4110 case Intrinsic::loongarch_csrwr_d: {
4111 unsigned Imm = Op.getConstantOperandVal(3);
4112 return !isUInt<14>(Imm)
4113 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4114 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4115 {Chain, Op.getOperand(2),
4116 DAG.getConstant(Imm, DL, GRLenVT)});
4117 }
4118 case Intrinsic::loongarch_csrxchg_w:
4119 case Intrinsic::loongarch_csrxchg_d: {
4120 unsigned Imm = Op.getConstantOperandVal(4);
4121 return !isUInt<14>(Imm)
4122 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4123 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4124 {Chain, Op.getOperand(2), Op.getOperand(3),
4125 DAG.getConstant(Imm, DL, GRLenVT)});
4126 }
4127 case Intrinsic::loongarch_iocsrrd_d: {
4128 return DAG.getNode(
4129 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4130 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4131 }
4132#define IOCSRRD_CASE(NAME, NODE) \
4133 case Intrinsic::loongarch_##NAME: { \
4134 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4135 {Chain, Op.getOperand(2)}); \
4136 }
4137 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4138 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4139 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4140#undef IOCSRRD_CASE
4141 case Intrinsic::loongarch_cpucfg: {
4142 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4143 {Chain, Op.getOperand(2)});
4144 }
4145 case Intrinsic::loongarch_lddir_d: {
4146 unsigned Imm = Op.getConstantOperandVal(3);
4147 return !isUInt<8>(Imm)
4148 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4149 : Op;
4150 }
4151 case Intrinsic::loongarch_movfcsr2gr: {
4152 if (!Subtarget.hasBasicF())
4153 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4154 unsigned Imm = Op.getConstantOperandVal(2);
4155 return !isUInt<2>(Imm)
4156 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4157 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4158 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4159 }
4160 case Intrinsic::loongarch_lsx_vld:
4161 case Intrinsic::loongarch_lsx_vldrepl_b:
4162 case Intrinsic::loongarch_lasx_xvld:
4163 case Intrinsic::loongarch_lasx_xvldrepl_b:
4164 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4165 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4166 : SDValue();
4167 case Intrinsic::loongarch_lsx_vldrepl_h:
4168 case Intrinsic::loongarch_lasx_xvldrepl_h:
4169 return !isShiftedInt<11, 1>(
4170 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4172 Op, "argument out of range or not a multiple of 2", DAG)
4173 : SDValue();
4174 case Intrinsic::loongarch_lsx_vldrepl_w:
4175 case Intrinsic::loongarch_lasx_xvldrepl_w:
4176 return !isShiftedInt<10, 2>(
4177 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4179 Op, "argument out of range or not a multiple of 4", DAG)
4180 : SDValue();
4181 case Intrinsic::loongarch_lsx_vldrepl_d:
4182 case Intrinsic::loongarch_lasx_xvldrepl_d:
4183 return !isShiftedInt<9, 3>(
4184 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4186 Op, "argument out of range or not a multiple of 8", DAG)
4187 : SDValue();
4188 }
4189}
4190
4191// Helper function that emits error message for intrinsics with void return
4192// value and return the chain.
4194 SelectionDAG &DAG) {
4195
4196 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4197 return Op.getOperand(0);
4198}
4199
4200SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4201 SelectionDAG &DAG) const {
4202 SDLoc DL(Op);
4203 MVT GRLenVT = Subtarget.getGRLenVT();
4204 SDValue Chain = Op.getOperand(0);
4205 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4206 SDValue Op2 = Op.getOperand(2);
4207 const StringRef ErrorMsgOOR = "argument out of range";
4208 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4209 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4210 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4211
4212 switch (IntrinsicEnum) {
4213 default:
4214 // TODO: Add more Intrinsics.
4215 return SDValue();
4216 case Intrinsic::loongarch_cacop_d:
4217 case Intrinsic::loongarch_cacop_w: {
4218 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4219 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4220 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4221 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4222 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4223 unsigned Imm1 = Op2->getAsZExtVal();
4224 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4225 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4226 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4227 return Op;
4228 }
4229 case Intrinsic::loongarch_dbar: {
4230 unsigned Imm = Op2->getAsZExtVal();
4231 return !isUInt<15>(Imm)
4232 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4233 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4234 DAG.getConstant(Imm, DL, GRLenVT));
4235 }
4236 case Intrinsic::loongarch_ibar: {
4237 unsigned Imm = Op2->getAsZExtVal();
4238 return !isUInt<15>(Imm)
4239 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4240 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4241 DAG.getConstant(Imm, DL, GRLenVT));
4242 }
4243 case Intrinsic::loongarch_break: {
4244 unsigned Imm = Op2->getAsZExtVal();
4245 return !isUInt<15>(Imm)
4246 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4247 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4248 DAG.getConstant(Imm, DL, GRLenVT));
4249 }
4250 case Intrinsic::loongarch_movgr2fcsr: {
4251 if (!Subtarget.hasBasicF())
4252 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4253 unsigned Imm = Op2->getAsZExtVal();
4254 return !isUInt<2>(Imm)
4255 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4256 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4257 DAG.getConstant(Imm, DL, GRLenVT),
4258 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4259 Op.getOperand(3)));
4260 }
4261 case Intrinsic::loongarch_syscall: {
4262 unsigned Imm = Op2->getAsZExtVal();
4263 return !isUInt<15>(Imm)
4264 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4265 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4266 DAG.getConstant(Imm, DL, GRLenVT));
4267 }
4268#define IOCSRWR_CASE(NAME, NODE) \
4269 case Intrinsic::loongarch_##NAME: { \
4270 SDValue Op3 = Op.getOperand(3); \
4271 return Subtarget.is64Bit() \
4272 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4273 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4274 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4275 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4276 Op3); \
4277 }
4278 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4279 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4280 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4281#undef IOCSRWR_CASE
4282 case Intrinsic::loongarch_iocsrwr_d: {
4283 return !Subtarget.is64Bit()
4284 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4285 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4286 Op2,
4287 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4288 Op.getOperand(3)));
4289 }
4290#define ASRT_LE_GT_CASE(NAME) \
4291 case Intrinsic::loongarch_##NAME: { \
4292 return !Subtarget.is64Bit() \
4293 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4294 : Op; \
4295 }
4296 ASRT_LE_GT_CASE(asrtle_d)
4297 ASRT_LE_GT_CASE(asrtgt_d)
4298#undef ASRT_LE_GT_CASE
4299 case Intrinsic::loongarch_ldpte_d: {
4300 unsigned Imm = Op.getConstantOperandVal(3);
4301 return !Subtarget.is64Bit()
4302 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4303 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4304 : Op;
4305 }
4306 case Intrinsic::loongarch_lsx_vst:
4307 case Intrinsic::loongarch_lasx_xvst:
4308 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4309 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4310 : SDValue();
4311 case Intrinsic::loongarch_lasx_xvstelm_b:
4312 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4313 !isUInt<5>(Op.getConstantOperandVal(5)))
4314 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4315 : SDValue();
4316 case Intrinsic::loongarch_lsx_vstelm_b:
4317 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4318 !isUInt<4>(Op.getConstantOperandVal(5)))
4319 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4320 : SDValue();
4321 case Intrinsic::loongarch_lasx_xvstelm_h:
4322 return (!isShiftedInt<8, 1>(
4323 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4324 !isUInt<4>(Op.getConstantOperandVal(5)))
4326 Op, "argument out of range or not a multiple of 2", DAG)
4327 : SDValue();
4328 case Intrinsic::loongarch_lsx_vstelm_h:
4329 return (!isShiftedInt<8, 1>(
4330 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4331 !isUInt<3>(Op.getConstantOperandVal(5)))
4333 Op, "argument out of range or not a multiple of 2", DAG)
4334 : SDValue();
4335 case Intrinsic::loongarch_lasx_xvstelm_w:
4336 return (!isShiftedInt<8, 2>(
4337 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4338 !isUInt<3>(Op.getConstantOperandVal(5)))
4340 Op, "argument out of range or not a multiple of 4", DAG)
4341 : SDValue();
4342 case Intrinsic::loongarch_lsx_vstelm_w:
4343 return (!isShiftedInt<8, 2>(
4344 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4345 !isUInt<2>(Op.getConstantOperandVal(5)))
4347 Op, "argument out of range or not a multiple of 4", DAG)
4348 : SDValue();
4349 case Intrinsic::loongarch_lasx_xvstelm_d:
4350 return (!isShiftedInt<8, 3>(
4351 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4352 !isUInt<2>(Op.getConstantOperandVal(5)))
4354 Op, "argument out of range or not a multiple of 8", DAG)
4355 : SDValue();
4356 case Intrinsic::loongarch_lsx_vstelm_d:
4357 return (!isShiftedInt<8, 3>(
4358 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4359 !isUInt<1>(Op.getConstantOperandVal(5)))
4361 Op, "argument out of range or not a multiple of 8", DAG)
4362 : SDValue();
4363 }
4364}
4365
4366SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4367 SelectionDAG &DAG) const {
4368 SDLoc DL(Op);
4369 SDValue Lo = Op.getOperand(0);
4370 SDValue Hi = Op.getOperand(1);
4371 SDValue Shamt = Op.getOperand(2);
4372 EVT VT = Lo.getValueType();
4373
4374 // if Shamt-GRLen < 0: // Shamt < GRLen
4375 // Lo = Lo << Shamt
4376 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4377 // else:
4378 // Lo = 0
4379 // Hi = Lo << (Shamt-GRLen)
4380
4381 SDValue Zero = DAG.getConstant(0, DL, VT);
4382 SDValue One = DAG.getConstant(1, DL, VT);
4383 SDValue MinusGRLen =
4384 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4385 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4386 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4387 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4388
4389 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4390 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4391 SDValue ShiftRightLo =
4392 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4393 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4394 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4395 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4396
4397 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4398
4399 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4400 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4401
4402 SDValue Parts[2] = {Lo, Hi};
4403 return DAG.getMergeValues(Parts, DL);
4404}
4405
4406SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4407 SelectionDAG &DAG,
4408 bool IsSRA) const {
4409 SDLoc DL(Op);
4410 SDValue Lo = Op.getOperand(0);
4411 SDValue Hi = Op.getOperand(1);
4412 SDValue Shamt = Op.getOperand(2);
4413 EVT VT = Lo.getValueType();
4414
4415 // SRA expansion:
4416 // if Shamt-GRLen < 0: // Shamt < GRLen
4417 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4418 // Hi = Hi >>s Shamt
4419 // else:
4420 // Lo = Hi >>s (Shamt-GRLen);
4421 // Hi = Hi >>s (GRLen-1)
4422 //
4423 // SRL expansion:
4424 // if Shamt-GRLen < 0: // Shamt < GRLen
4425 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4426 // Hi = Hi >>u Shamt
4427 // else:
4428 // Lo = Hi >>u (Shamt-GRLen);
4429 // Hi = 0;
4430
4431 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4432
4433 SDValue Zero = DAG.getConstant(0, DL, VT);
4434 SDValue One = DAG.getConstant(1, DL, VT);
4435 SDValue MinusGRLen =
4436 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4437 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4438 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4439 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4440
4441 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4442 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4443 SDValue ShiftLeftHi =
4444 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4445 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4446 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4447 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4448 SDValue HiFalse =
4449 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4450
4451 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4452
4453 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4454 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4455
4456 SDValue Parts[2] = {Lo, Hi};
4457 return DAG.getMergeValues(Parts, DL);
4458}
4459
4460// Returns the opcode of the target-specific SDNode that implements the 32-bit
4461// form of the given Opcode.
4463 switch (Opcode) {
4464 default:
4465 llvm_unreachable("Unexpected opcode");
4466 case ISD::SDIV:
4467 return LoongArchISD::DIV_W;
4468 case ISD::UDIV:
4469 return LoongArchISD::DIV_WU;
4470 case ISD::SREM:
4471 return LoongArchISD::MOD_W;
4472 case ISD::UREM:
4473 return LoongArchISD::MOD_WU;
4474 case ISD::SHL:
4475 return LoongArchISD::SLL_W;
4476 case ISD::SRA:
4477 return LoongArchISD::SRA_W;
4478 case ISD::SRL:
4479 return LoongArchISD::SRL_W;
4480 case ISD::ROTL:
4481 case ISD::ROTR:
4482 return LoongArchISD::ROTR_W;
4483 case ISD::CTTZ:
4484 return LoongArchISD::CTZ_W;
4485 case ISD::CTLZ:
4486 return LoongArchISD::CLZ_W;
4487 }
4488}
4489
4490// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4491// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4492// otherwise be promoted to i64, making it difficult to select the
4493// SLL_W/.../*W later one because the fact the operation was originally of
4494// type i8/i16/i32 is lost.
4496 unsigned ExtOpc = ISD::ANY_EXTEND) {
4497 SDLoc DL(N);
4498 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4499 SDValue NewOp0, NewRes;
4500
4501 switch (NumOp) {
4502 default:
4503 llvm_unreachable("Unexpected NumOp");
4504 case 1: {
4505 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4506 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4507 break;
4508 }
4509 case 2: {
4510 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4511 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4512 if (N->getOpcode() == ISD::ROTL) {
4513 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4514 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4515 }
4516 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4517 break;
4518 }
4519 // TODO:Handle more NumOp.
4520 }
4521
4522 // ReplaceNodeResults requires we maintain the same type for the return
4523 // value.
4524 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4525}
4526
4527// Converts the given 32-bit operation to a i64 operation with signed extension
4528// semantic to reduce the signed extension instructions.
4530 SDLoc DL(N);
4531 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4532 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4533 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4534 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4535 DAG.getValueType(MVT::i32));
4536 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4537}
4538
4539// Helper function that emits error message for intrinsics with/without chain
4540// and return a UNDEF or and the chain as the results.
4543 StringRef ErrorMsg, bool WithChain = true) {
4544 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4545 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4546 if (!WithChain)
4547 return;
4548 Results.push_back(N->getOperand(0));
4549}
4550
4551template <unsigned N>
4552static void
4554 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4555 unsigned ResOp) {
4556 const StringRef ErrorMsgOOR = "argument out of range";
4557 unsigned Imm = Node->getConstantOperandVal(2);
4558 if (!isUInt<N>(Imm)) {
4560 /*WithChain=*/false);
4561 return;
4562 }
4563 SDLoc DL(Node);
4564 SDValue Vec = Node->getOperand(1);
4565
4566 SDValue PickElt =
4567 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4568 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4570 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4571 PickElt.getValue(0)));
4572}
4573
4576 SelectionDAG &DAG,
4577 const LoongArchSubtarget &Subtarget,
4578 unsigned ResOp) {
4579 SDLoc DL(N);
4580 SDValue Vec = N->getOperand(1);
4581
4582 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4583 Results.push_back(
4584 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4585}
4586
4587static void
4589 SelectionDAG &DAG,
4590 const LoongArchSubtarget &Subtarget) {
4591 switch (N->getConstantOperandVal(0)) {
4592 default:
4593 llvm_unreachable("Unexpected Intrinsic.");
4594 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4595 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4597 break;
4598 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4599 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4600 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4602 break;
4603 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4604 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4606 break;
4607 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4608 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4610 break;
4611 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4612 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4613 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4615 break;
4616 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4617 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4619 break;
4620 case Intrinsic::loongarch_lsx_bz_b:
4621 case Intrinsic::loongarch_lsx_bz_h:
4622 case Intrinsic::loongarch_lsx_bz_w:
4623 case Intrinsic::loongarch_lsx_bz_d:
4624 case Intrinsic::loongarch_lasx_xbz_b:
4625 case Intrinsic::loongarch_lasx_xbz_h:
4626 case Intrinsic::loongarch_lasx_xbz_w:
4627 case Intrinsic::loongarch_lasx_xbz_d:
4628 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4630 break;
4631 case Intrinsic::loongarch_lsx_bz_v:
4632 case Intrinsic::loongarch_lasx_xbz_v:
4633 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4635 break;
4636 case Intrinsic::loongarch_lsx_bnz_b:
4637 case Intrinsic::loongarch_lsx_bnz_h:
4638 case Intrinsic::loongarch_lsx_bnz_w:
4639 case Intrinsic::loongarch_lsx_bnz_d:
4640 case Intrinsic::loongarch_lasx_xbnz_b:
4641 case Intrinsic::loongarch_lasx_xbnz_h:
4642 case Intrinsic::loongarch_lasx_xbnz_w:
4643 case Intrinsic::loongarch_lasx_xbnz_d:
4644 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4646 break;
4647 case Intrinsic::loongarch_lsx_bnz_v:
4648 case Intrinsic::loongarch_lasx_xbnz_v:
4649 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4651 break;
4652 }
4653}
4654
4657 SelectionDAG &DAG) {
4658 assert(N->getValueType(0) == MVT::i128 &&
4659 "AtomicCmpSwap on types less than 128 should be legal");
4660 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4661
4662 unsigned Opcode;
4663 switch (MemOp->getMergedOrdering()) {
4667 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4668 break;
4671 Opcode = LoongArch::PseudoCmpXchg128;
4672 break;
4673 default:
4674 llvm_unreachable("Unexpected ordering!");
4675 }
4676
4677 SDLoc DL(N);
4678 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4679 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4680 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4681 NewVal.first, NewVal.second, N->getOperand(0)};
4682
4683 SDNode *CmpSwap = DAG.getMachineNode(
4684 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4685 Ops);
4686 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4687 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4688 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4689 Results.push_back(SDValue(CmpSwap, 3));
4690}
4691
4694 SDLoc DL(N);
4695 EVT VT = N->getValueType(0);
4696 switch (N->getOpcode()) {
4697 default:
4698 llvm_unreachable("Don't know how to legalize this operation");
4699 case ISD::ADD:
4700 case ISD::SUB:
4701 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4702 "Unexpected custom legalisation");
4703 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4704 break;
4705 case ISD::SDIV:
4706 case ISD::UDIV:
4707 case ISD::SREM:
4708 case ISD::UREM:
4709 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4710 "Unexpected custom legalisation");
4711 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4712 Subtarget.hasDiv32() && VT == MVT::i32
4714 : ISD::SIGN_EXTEND));
4715 break;
4716 case ISD::SHL:
4717 case ISD::SRA:
4718 case ISD::SRL:
4719 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4720 "Unexpected custom legalisation");
4721 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4722 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4723 break;
4724 }
4725 break;
4726 case ISD::ROTL:
4727 case ISD::ROTR:
4728 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4729 "Unexpected custom legalisation");
4730 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4731 break;
4732 case ISD::FP_TO_SINT: {
4733 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4734 "Unexpected custom legalisation");
4735 SDValue Src = N->getOperand(0);
4736 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4737 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4739 if (!isTypeLegal(Src.getValueType()))
4740 return;
4741 if (Src.getValueType() == MVT::f16)
4742 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4743 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4744 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4745 return;
4746 }
4747 // If the FP type needs to be softened, emit a library call using the 'si'
4748 // version. If we left it to default legalization we'd end up with 'di'.
4749 RTLIB::Libcall LC;
4750 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4751 MakeLibCallOptions CallOptions;
4752 EVT OpVT = Src.getValueType();
4753 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4754 SDValue Chain = SDValue();
4755 SDValue Result;
4756 std::tie(Result, Chain) =
4757 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4758 Results.push_back(Result);
4759 break;
4760 }
4761 case ISD::BITCAST: {
4762 SDValue Src = N->getOperand(0);
4763 EVT SrcVT = Src.getValueType();
4764 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4765 Subtarget.hasBasicF()) {
4766 SDValue Dst =
4767 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4768 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4769 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4771 DAG.getVTList(MVT::i32, MVT::i32), Src);
4772 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4773 NewReg.getValue(0), NewReg.getValue(1));
4774 Results.push_back(RetReg);
4775 }
4776 break;
4777 }
4778 case ISD::FP_TO_UINT: {
4779 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4780 "Unexpected custom legalisation");
4781 auto &TLI = DAG.getTargetLoweringInfo();
4782 SDValue Tmp1, Tmp2;
4783 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4784 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4785 break;
4786 }
4787 case ISD::BSWAP: {
4788 SDValue Src = N->getOperand(0);
4789 assert((VT == MVT::i16 || VT == MVT::i32) &&
4790 "Unexpected custom legalization");
4791 MVT GRLenVT = Subtarget.getGRLenVT();
4792 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4793 SDValue Tmp;
4794 switch (VT.getSizeInBits()) {
4795 default:
4796 llvm_unreachable("Unexpected operand width");
4797 case 16:
4798 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4799 break;
4800 case 32:
4801 // Only LA64 will get to here due to the size mismatch between VT and
4802 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4803 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4804 break;
4805 }
4806 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4807 break;
4808 }
4809 case ISD::BITREVERSE: {
4810 SDValue Src = N->getOperand(0);
4811 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4812 "Unexpected custom legalization");
4813 MVT GRLenVT = Subtarget.getGRLenVT();
4814 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4815 SDValue Tmp;
4816 switch (VT.getSizeInBits()) {
4817 default:
4818 llvm_unreachable("Unexpected operand width");
4819 case 8:
4820 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4821 break;
4822 case 32:
4823 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4824 break;
4825 }
4826 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4827 break;
4828 }
4829 case ISD::CTLZ:
4830 case ISD::CTTZ: {
4831 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4832 "Unexpected custom legalisation");
4833 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4834 break;
4835 }
4837 SDValue Chain = N->getOperand(0);
4838 SDValue Op2 = N->getOperand(2);
4839 MVT GRLenVT = Subtarget.getGRLenVT();
4840 const StringRef ErrorMsgOOR = "argument out of range";
4841 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4842 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4843
4844 switch (N->getConstantOperandVal(1)) {
4845 default:
4846 llvm_unreachable("Unexpected Intrinsic.");
4847 case Intrinsic::loongarch_movfcsr2gr: {
4848 if (!Subtarget.hasBasicF()) {
4849 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4850 return;
4851 }
4852 unsigned Imm = Op2->getAsZExtVal();
4853 if (!isUInt<2>(Imm)) {
4854 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4855 return;
4856 }
4857 SDValue MOVFCSR2GRResults = DAG.getNode(
4858 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4859 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4860 Results.push_back(
4861 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4862 Results.push_back(MOVFCSR2GRResults.getValue(1));
4863 break;
4864 }
4865#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4866 case Intrinsic::loongarch_##NAME: { \
4867 SDValue NODE = DAG.getNode( \
4868 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4869 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4870 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4871 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4872 Results.push_back(NODE.getValue(1)); \
4873 break; \
4874 }
4875 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4876 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4877 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4878 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4879 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4880 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4881#undef CRC_CASE_EXT_BINARYOP
4882
4883#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4884 case Intrinsic::loongarch_##NAME: { \
4885 SDValue NODE = DAG.getNode( \
4886 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4887 {Chain, Op2, \
4888 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4889 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4890 Results.push_back(NODE.getValue(1)); \
4891 break; \
4892 }
4893 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4894 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4895#undef CRC_CASE_EXT_UNARYOP
4896#define CSR_CASE(ID) \
4897 case Intrinsic::loongarch_##ID: { \
4898 if (!Subtarget.is64Bit()) \
4899 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4900 break; \
4901 }
4902 CSR_CASE(csrrd_d);
4903 CSR_CASE(csrwr_d);
4904 CSR_CASE(csrxchg_d);
4905 CSR_CASE(iocsrrd_d);
4906#undef CSR_CASE
4907 case Intrinsic::loongarch_csrrd_w: {
4908 unsigned Imm = Op2->getAsZExtVal();
4909 if (!isUInt<14>(Imm)) {
4910 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4911 return;
4912 }
4913 SDValue CSRRDResults =
4914 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4915 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4916 Results.push_back(
4917 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4918 Results.push_back(CSRRDResults.getValue(1));
4919 break;
4920 }
4921 case Intrinsic::loongarch_csrwr_w: {
4922 unsigned Imm = N->getConstantOperandVal(3);
4923 if (!isUInt<14>(Imm)) {
4924 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4925 return;
4926 }
4927 SDValue CSRWRResults =
4928 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4929 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4930 DAG.getConstant(Imm, DL, GRLenVT)});
4931 Results.push_back(
4932 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4933 Results.push_back(CSRWRResults.getValue(1));
4934 break;
4935 }
4936 case Intrinsic::loongarch_csrxchg_w: {
4937 unsigned Imm = N->getConstantOperandVal(4);
4938 if (!isUInt<14>(Imm)) {
4939 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4940 return;
4941 }
4942 SDValue CSRXCHGResults = DAG.getNode(
4943 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4944 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4945 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4946 DAG.getConstant(Imm, DL, GRLenVT)});
4947 Results.push_back(
4948 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4949 Results.push_back(CSRXCHGResults.getValue(1));
4950 break;
4951 }
4952#define IOCSRRD_CASE(NAME, NODE) \
4953 case Intrinsic::loongarch_##NAME: { \
4954 SDValue IOCSRRDResults = \
4955 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4956 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4957 Results.push_back( \
4958 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4959 Results.push_back(IOCSRRDResults.getValue(1)); \
4960 break; \
4961 }
4962 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4963 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4964 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4965#undef IOCSRRD_CASE
4966 case Intrinsic::loongarch_cpucfg: {
4967 SDValue CPUCFGResults =
4968 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4969 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4970 Results.push_back(
4971 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4972 Results.push_back(CPUCFGResults.getValue(1));
4973 break;
4974 }
4975 case Intrinsic::loongarch_lddir_d: {
4976 if (!Subtarget.is64Bit()) {
4977 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4978 return;
4979 }
4980 break;
4981 }
4982 }
4983 break;
4984 }
4985 case ISD::READ_REGISTER: {
4986 if (Subtarget.is64Bit())
4987 DAG.getContext()->emitError(
4988 "On LA64, only 64-bit registers can be read.");
4989 else
4990 DAG.getContext()->emitError(
4991 "On LA32, only 32-bit registers can be read.");
4992 Results.push_back(DAG.getUNDEF(VT));
4993 Results.push_back(N->getOperand(0));
4994 break;
4995 }
4997 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4998 break;
4999 }
5000 case ISD::LROUND: {
5001 SDValue Op0 = N->getOperand(0);
5002 EVT OpVT = Op0.getValueType();
5003 RTLIB::Libcall LC =
5004 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5005 MakeLibCallOptions CallOptions;
5006 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5007 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5008 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5009 Results.push_back(Result);
5010 break;
5011 }
5012 case ISD::ATOMIC_CMP_SWAP: {
5014 break;
5015 }
5016 case ISD::TRUNCATE: {
5017 MVT VT = N->getSimpleValueType(0);
5018 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5019 return;
5020
5021 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5022 SDValue In = N->getOperand(0);
5023 EVT InVT = In.getValueType();
5024 EVT InEltVT = InVT.getVectorElementType();
5025 EVT EltVT = VT.getVectorElementType();
5026 unsigned MinElts = VT.getVectorNumElements();
5027 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5028 unsigned InBits = InVT.getSizeInBits();
5029
5030 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5031 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5032 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5033 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5034 for (unsigned I = 0; I < MinElts; ++I)
5035 TruncMask[I] = Scale * I;
5036
5037 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5038 MVT SVT = In.getSimpleValueType().getScalarType();
5039 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5040 SDValue WidenIn =
5041 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5042 DAG.getVectorIdxConstant(0, DL));
5043 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5044 "Illegal vector type in truncation");
5045 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5046 Results.push_back(
5047 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5048 return;
5049 }
5050 }
5051
5052 break;
5053 }
5054 }
5055}
5056
5059 const LoongArchSubtarget &Subtarget) {
5060 if (DCI.isBeforeLegalizeOps())
5061 return SDValue();
5062
5063 SDValue FirstOperand = N->getOperand(0);
5064 SDValue SecondOperand = N->getOperand(1);
5065 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5066 EVT ValTy = N->getValueType(0);
5067 SDLoc DL(N);
5068 uint64_t lsb, msb;
5069 unsigned SMIdx, SMLen;
5070 ConstantSDNode *CN;
5071 SDValue NewOperand;
5072 MVT GRLenVT = Subtarget.getGRLenVT();
5073
5074 // BSTRPICK requires the 32S feature.
5075 if (!Subtarget.has32S())
5076 return SDValue();
5077
5078 // Op's second operand must be a shifted mask.
5079 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5080 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5081 return SDValue();
5082
5083 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5084 // Pattern match BSTRPICK.
5085 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5086 // => BSTRPICK $dst, $src, msb, lsb
5087 // where msb = lsb + len - 1
5088
5089 // The second operand of the shift must be an immediate.
5090 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5091 return SDValue();
5092
5093 lsb = CN->getZExtValue();
5094
5095 // Return if the shifted mask does not start at bit 0 or the sum of its
5096 // length and lsb exceeds the word's size.
5097 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5098 return SDValue();
5099
5100 NewOperand = FirstOperand.getOperand(0);
5101 } else {
5102 // Pattern match BSTRPICK.
5103 // $dst = and $src, (2**len- 1) , if len > 12
5104 // => BSTRPICK $dst, $src, msb, lsb
5105 // where lsb = 0 and msb = len - 1
5106
5107 // If the mask is <= 0xfff, andi can be used instead.
5108 if (CN->getZExtValue() <= 0xfff)
5109 return SDValue();
5110
5111 // Return if the MSB exceeds.
5112 if (SMIdx + SMLen > ValTy.getSizeInBits())
5113 return SDValue();
5114
5115 if (SMIdx > 0) {
5116 // Omit if the constant has more than 2 uses. This a conservative
5117 // decision. Whether it is a win depends on the HW microarchitecture.
5118 // However it should always be better for 1 and 2 uses.
5119 if (CN->use_size() > 2)
5120 return SDValue();
5121 // Return if the constant can be composed by a single LU12I.W.
5122 if ((CN->getZExtValue() & 0xfff) == 0)
5123 return SDValue();
5124 // Return if the constand can be composed by a single ADDI with
5125 // the zero register.
5126 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5127 return SDValue();
5128 }
5129
5130 lsb = SMIdx;
5131 NewOperand = FirstOperand;
5132 }
5133
5134 msb = lsb + SMLen - 1;
5135 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5136 DAG.getConstant(msb, DL, GRLenVT),
5137 DAG.getConstant(lsb, DL, GRLenVT));
5138 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5139 return NR0;
5140 // Try to optimize to
5141 // bstrpick $Rd, $Rs, msb, lsb
5142 // slli $Rd, $Rd, lsb
5143 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5144 DAG.getConstant(lsb, DL, GRLenVT));
5145}
5146
5149 const LoongArchSubtarget &Subtarget) {
5150 // BSTRPICK requires the 32S feature.
5151 if (!Subtarget.has32S())
5152 return SDValue();
5153
5154 if (DCI.isBeforeLegalizeOps())
5155 return SDValue();
5156
5157 // $dst = srl (and $src, Mask), Shamt
5158 // =>
5159 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5160 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5161 //
5162
5163 SDValue FirstOperand = N->getOperand(0);
5164 ConstantSDNode *CN;
5165 EVT ValTy = N->getValueType(0);
5166 SDLoc DL(N);
5167 MVT GRLenVT = Subtarget.getGRLenVT();
5168 unsigned MaskIdx, MaskLen;
5169 uint64_t Shamt;
5170
5171 // The first operand must be an AND and the second operand of the AND must be
5172 // a shifted mask.
5173 if (FirstOperand.getOpcode() != ISD::AND ||
5174 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5175 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5176 return SDValue();
5177
5178 // The second operand (shift amount) must be an immediate.
5179 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5180 return SDValue();
5181
5182 Shamt = CN->getZExtValue();
5183 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5184 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5185 FirstOperand->getOperand(0),
5186 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5187 DAG.getConstant(Shamt, DL, GRLenVT));
5188
5189 return SDValue();
5190}
5191
5192// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5193// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5194static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5195 unsigned Depth) {
5196 // Limit recursion.
5198 return false;
5199 switch (Src.getOpcode()) {
5200 case ISD::SETCC:
5201 case ISD::TRUNCATE:
5202 return Src.getOperand(0).getValueSizeInBits() == Size;
5203 case ISD::FREEZE:
5204 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5205 case ISD::AND:
5206 case ISD::XOR:
5207 case ISD::OR:
5208 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5209 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5210 case ISD::SELECT:
5211 case ISD::VSELECT:
5212 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5213 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5214 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5215 case ISD::BUILD_VECTOR:
5216 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5217 ISD::isBuildVectorAllOnes(Src.getNode());
5218 }
5219 return false;
5220}
5221
5222// Helper to push sign extension of vXi1 SETCC result through bitops.
5224 SDValue Src, const SDLoc &DL) {
5225 switch (Src.getOpcode()) {
5226 case ISD::SETCC:
5227 case ISD::FREEZE:
5228 case ISD::TRUNCATE:
5229 case ISD::BUILD_VECTOR:
5230 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5231 case ISD::AND:
5232 case ISD::XOR:
5233 case ISD::OR:
5234 return DAG.getNode(
5235 Src.getOpcode(), DL, SExtVT,
5236 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5237 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5238 case ISD::SELECT:
5239 case ISD::VSELECT:
5240 return DAG.getSelect(
5241 DL, SExtVT, Src.getOperand(0),
5242 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5243 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5244 }
5245 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5246}
5247
5248static SDValue
5251 const LoongArchSubtarget &Subtarget) {
5252 SDLoc DL(N);
5253 EVT VT = N->getValueType(0);
5254 SDValue Src = N->getOperand(0);
5255 EVT SrcVT = Src.getValueType();
5256
5257 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5258 return SDValue();
5259
5260 bool UseLASX;
5261 unsigned Opc = ISD::DELETED_NODE;
5262 EVT CmpVT = Src.getOperand(0).getValueType();
5263 EVT EltVT = CmpVT.getVectorElementType();
5264
5265 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5266 UseLASX = false;
5267 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5268 CmpVT.getSizeInBits() == 256)
5269 UseLASX = true;
5270 else
5271 return SDValue();
5272
5273 SDValue SrcN1 = Src.getOperand(1);
5274 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5275 default:
5276 break;
5277 case ISD::SETEQ:
5278 // x == 0 => not (vmsknez.b x)
5279 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5281 break;
5282 case ISD::SETGT:
5283 // x > -1 => vmskgez.b x
5284 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5286 break;
5287 case ISD::SETGE:
5288 // x >= 0 => vmskgez.b x
5289 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5291 break;
5292 case ISD::SETLT:
5293 // x < 0 => vmskltz.{b,h,w,d} x
5294 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5295 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5296 EltVT == MVT::i64))
5298 break;
5299 case ISD::SETLE:
5300 // x <= -1 => vmskltz.{b,h,w,d} x
5301 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5302 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5303 EltVT == MVT::i64))
5305 break;
5306 case ISD::SETNE:
5307 // x != 0 => vmsknez.b x
5308 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5310 break;
5311 }
5312
5313 if (Opc == ISD::DELETED_NODE)
5314 return SDValue();
5315
5316 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5318 V = DAG.getZExtOrTrunc(V, DL, T);
5319 return DAG.getBitcast(VT, V);
5320}
5321
5324 const LoongArchSubtarget &Subtarget) {
5325 SDLoc DL(N);
5326 EVT VT = N->getValueType(0);
5327 SDValue Src = N->getOperand(0);
5328 EVT SrcVT = Src.getValueType();
5329 MVT GRLenVT = Subtarget.getGRLenVT();
5330
5331 if (!DCI.isBeforeLegalizeOps())
5332 return SDValue();
5333
5334 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5335 return SDValue();
5336
5337 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5338 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5339 if (Res)
5340 return Res;
5341
5342 // Generate vXi1 using [X]VMSKLTZ
5343 MVT SExtVT;
5344 unsigned Opc;
5345 bool UseLASX = false;
5346 bool PropagateSExt = false;
5347
5348 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5349 EVT CmpVT = Src.getOperand(0).getValueType();
5350 if (CmpVT.getSizeInBits() > 256)
5351 return SDValue();
5352 }
5353
5354 switch (SrcVT.getSimpleVT().SimpleTy) {
5355 default:
5356 return SDValue();
5357 case MVT::v2i1:
5358 SExtVT = MVT::v2i64;
5359 break;
5360 case MVT::v4i1:
5361 SExtVT = MVT::v4i32;
5362 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5363 SExtVT = MVT::v4i64;
5364 UseLASX = true;
5365 PropagateSExt = true;
5366 }
5367 break;
5368 case MVT::v8i1:
5369 SExtVT = MVT::v8i16;
5370 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5371 SExtVT = MVT::v8i32;
5372 UseLASX = true;
5373 PropagateSExt = true;
5374 }
5375 break;
5376 case MVT::v16i1:
5377 SExtVT = MVT::v16i8;
5378 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5379 SExtVT = MVT::v16i16;
5380 UseLASX = true;
5381 PropagateSExt = true;
5382 }
5383 break;
5384 case MVT::v32i1:
5385 SExtVT = MVT::v32i8;
5386 UseLASX = true;
5387 break;
5388 };
5389 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5390 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5391
5392 SDValue V;
5393 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5394 if (Src.getSimpleValueType() == MVT::v32i8) {
5395 SDValue Lo, Hi;
5396 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5397 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5398 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5399 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5400 DAG.getConstant(16, DL, MVT::i8));
5401 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5402 } else if (UseLASX) {
5403 return SDValue();
5404 }
5405 }
5406
5407 if (!V) {
5409 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5410 }
5411
5413 V = DAG.getZExtOrTrunc(V, DL, T);
5414 return DAG.getBitcast(VT, V);
5415}
5416
5419 const LoongArchSubtarget &Subtarget) {
5420 MVT GRLenVT = Subtarget.getGRLenVT();
5421 EVT ValTy = N->getValueType(0);
5422 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5423 ConstantSDNode *CN0, *CN1;
5424 SDLoc DL(N);
5425 unsigned ValBits = ValTy.getSizeInBits();
5426 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5427 unsigned Shamt;
5428 bool SwapAndRetried = false;
5429
5430 // BSTRPICK requires the 32S feature.
5431 if (!Subtarget.has32S())
5432 return SDValue();
5433
5434 if (DCI.isBeforeLegalizeOps())
5435 return SDValue();
5436
5437 if (ValBits != 32 && ValBits != 64)
5438 return SDValue();
5439
5440Retry:
5441 // 1st pattern to match BSTRINS:
5442 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5443 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5444 // =>
5445 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5446 if (N0.getOpcode() == ISD::AND &&
5447 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5448 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5449 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5450 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5451 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5452 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5453 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5454 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5455 (MaskIdx0 + MaskLen0 <= ValBits)) {
5456 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5457 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5458 N1.getOperand(0).getOperand(0),
5459 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5460 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5461 }
5462
5463 // 2nd pattern to match BSTRINS:
5464 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5465 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5466 // =>
5467 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5468 if (N0.getOpcode() == ISD::AND &&
5469 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5470 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5471 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5472 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5473 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5474 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5475 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5476 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5477 (MaskIdx0 + MaskLen0 <= ValBits)) {
5478 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5479 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5480 N1.getOperand(0).getOperand(0),
5481 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5482 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5483 }
5484
5485 // 3rd pattern to match BSTRINS:
5486 // R = or (and X, mask0), (and Y, mask1)
5487 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5488 // =>
5489 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5490 // where msb = lsb + size - 1
5491 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5492 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5493 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5494 (MaskIdx0 + MaskLen0 <= 64) &&
5495 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5496 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5497 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5498 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5499 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5500 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5501 DAG.getConstant(ValBits == 32
5502 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5503 : (MaskIdx0 + MaskLen0 - 1),
5504 DL, GRLenVT),
5505 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5506 }
5507
5508 // 4th pattern to match BSTRINS:
5509 // R = or (and X, mask), (shl Y, shamt)
5510 // where mask = (2**shamt - 1)
5511 // =>
5512 // R = BSTRINS X, Y, ValBits - 1, shamt
5513 // where ValBits = 32 or 64
5514 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5515 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5516 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5517 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5518 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5519 (MaskIdx0 + MaskLen0 <= ValBits)) {
5520 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5521 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5522 N1.getOperand(0),
5523 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5524 DAG.getConstant(Shamt, DL, GRLenVT));
5525 }
5526
5527 // 5th pattern to match BSTRINS:
5528 // R = or (and X, mask), const
5529 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5530 // =>
5531 // R = BSTRINS X, (const >> lsb), msb, lsb
5532 // where msb = lsb + size - 1
5533 if (N0.getOpcode() == ISD::AND &&
5534 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5535 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5536 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5537 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5538 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5539 return DAG.getNode(
5540 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5541 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5542 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5543 : (MaskIdx0 + MaskLen0 - 1),
5544 DL, GRLenVT),
5545 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5546 }
5547
5548 // 6th pattern.
5549 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5550 // by the incoming bits are known to be zero.
5551 // =>
5552 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5553 //
5554 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5555 // pattern is more common than the 1st. So we put the 1st before the 6th in
5556 // order to match as many nodes as possible.
5557 ConstantSDNode *CNMask, *CNShamt;
5558 unsigned MaskIdx, MaskLen;
5559 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5560 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5561 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5562 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5563 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5564 Shamt = CNShamt->getZExtValue();
5565 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5566 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5567 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5568 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5569 N1.getOperand(0).getOperand(0),
5570 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5571 DAG.getConstant(Shamt, DL, GRLenVT));
5572 }
5573 }
5574
5575 // 7th pattern.
5576 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5577 // overwritten by the incoming bits are known to be zero.
5578 // =>
5579 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5580 //
5581 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5582 // before the 7th in order to match as many nodes as possible.
5583 if (N1.getOpcode() == ISD::AND &&
5584 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5585 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5586 N1.getOperand(0).getOpcode() == ISD::SHL &&
5587 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5588 CNShamt->getZExtValue() == MaskIdx) {
5589 APInt ShMask(ValBits, CNMask->getZExtValue());
5590 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5591 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5592 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5593 N1.getOperand(0).getOperand(0),
5594 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5595 DAG.getConstant(MaskIdx, DL, GRLenVT));
5596 }
5597 }
5598
5599 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5600 if (!SwapAndRetried) {
5601 std::swap(N0, N1);
5602 SwapAndRetried = true;
5603 goto Retry;
5604 }
5605
5606 SwapAndRetried = false;
5607Retry2:
5608 // 8th pattern.
5609 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5610 // the incoming bits are known to be zero.
5611 // =>
5612 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5613 //
5614 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5615 // we put it here in order to match as many nodes as possible or generate less
5616 // instructions.
5617 if (N1.getOpcode() == ISD::AND &&
5618 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5619 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5620 APInt ShMask(ValBits, CNMask->getZExtValue());
5621 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5622 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5623 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5624 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5625 N1->getOperand(0),
5626 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5627 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5628 DAG.getConstant(MaskIdx, DL, GRLenVT));
5629 }
5630 }
5631 // Swap N0/N1 and retry.
5632 if (!SwapAndRetried) {
5633 std::swap(N0, N1);
5634 SwapAndRetried = true;
5635 goto Retry2;
5636 }
5637
5638 return SDValue();
5639}
5640
5641static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5642 ExtType = ISD::NON_EXTLOAD;
5643
5644 switch (V.getNode()->getOpcode()) {
5645 case ISD::LOAD: {
5646 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5647 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5648 (LoadNode->getMemoryVT() == MVT::i16)) {
5649 ExtType = LoadNode->getExtensionType();
5650 return true;
5651 }
5652 return false;
5653 }
5654 case ISD::AssertSext: {
5655 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5656 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5657 ExtType = ISD::SEXTLOAD;
5658 return true;
5659 }
5660 return false;
5661 }
5662 case ISD::AssertZext: {
5663 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5664 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5665 ExtType = ISD::ZEXTLOAD;
5666 return true;
5667 }
5668 return false;
5669 }
5670 default:
5671 return false;
5672 }
5673
5674 return false;
5675}
5676
5677// Eliminate redundant truncation and zero-extension nodes.
5678// * Case 1:
5679// +------------+ +------------+ +------------+
5680// | Input1 | | Input2 | | CC |
5681// +------------+ +------------+ +------------+
5682// | | |
5683// V V +----+
5684// +------------+ +------------+ |
5685// | TRUNCATE | | TRUNCATE | |
5686// +------------+ +------------+ |
5687// | | |
5688// V V |
5689// +------------+ +------------+ |
5690// | ZERO_EXT | | ZERO_EXT | |
5691// +------------+ +------------+ |
5692// | | |
5693// | +-------------+ |
5694// V V | |
5695// +----------------+ | |
5696// | AND | | |
5697// +----------------+ | |
5698// | | |
5699// +---------------+ | |
5700// | | |
5701// V V V
5702// +-------------+
5703// | CMP |
5704// +-------------+
5705// * Case 2:
5706// +------------+ +------------+ +-------------+ +------------+ +------------+
5707// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5708// +------------+ +------------+ +-------------+ +------------+ +------------+
5709// | | | | |
5710// V | | | |
5711// +------------+ | | | |
5712// | XOR |<---------------------+ | |
5713// +------------+ | | |
5714// | | | |
5715// V V +---------------+ |
5716// +------------+ +------------+ | |
5717// | TRUNCATE | | TRUNCATE | | +-------------------------+
5718// +------------+ +------------+ | |
5719// | | | |
5720// V V | |
5721// +------------+ +------------+ | |
5722// | ZERO_EXT | | ZERO_EXT | | |
5723// +------------+ +------------+ | |
5724// | | | |
5725// V V | |
5726// +----------------+ | |
5727// | AND | | |
5728// +----------------+ | |
5729// | | |
5730// +---------------+ | |
5731// | | |
5732// V V V
5733// +-------------+
5734// | CMP |
5735// +-------------+
5738 const LoongArchSubtarget &Subtarget) {
5739 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5740
5741 SDNode *AndNode = N->getOperand(0).getNode();
5742 if (AndNode->getOpcode() != ISD::AND)
5743 return SDValue();
5744
5745 SDValue AndInputValue2 = AndNode->getOperand(1);
5746 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5747 return SDValue();
5748
5749 SDValue CmpInputValue = N->getOperand(1);
5750 SDValue AndInputValue1 = AndNode->getOperand(0);
5751 if (AndInputValue1.getOpcode() == ISD::XOR) {
5752 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5753 return SDValue();
5754 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5755 if (!CN || CN->getSExtValue() != -1)
5756 return SDValue();
5757 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5758 if (!CN || CN->getSExtValue() != 0)
5759 return SDValue();
5760 AndInputValue1 = AndInputValue1.getOperand(0);
5761 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5762 return SDValue();
5763 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5764 if (AndInputValue2 != CmpInputValue)
5765 return SDValue();
5766 } else {
5767 return SDValue();
5768 }
5769
5770 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5771 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5772 return SDValue();
5773
5774 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5775 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5776 return SDValue();
5777
5778 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5779 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5780 ISD::LoadExtType ExtType1;
5781 ISD::LoadExtType ExtType2;
5782
5783 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5784 !checkValueWidth(TruncInputValue2, ExtType2))
5785 return SDValue();
5786
5787 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5788 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5789 return SDValue();
5790
5791 if ((ExtType2 != ISD::ZEXTLOAD) &&
5792 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5793 return SDValue();
5794
5795 // These truncation and zero-extension nodes are not necessary, remove them.
5796 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5797 TruncInputValue1, TruncInputValue2);
5798 SDValue NewSetCC =
5799 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5800 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5801 return SDValue(N, 0);
5802}
5803
5804// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5807 const LoongArchSubtarget &Subtarget) {
5808 if (DCI.isBeforeLegalizeOps())
5809 return SDValue();
5810
5811 SDValue Src = N->getOperand(0);
5812 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5813 return SDValue();
5814
5815 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5816 Src.getOperand(0));
5817}
5818
5819// Perform common combines for BR_CC and SELECT_CC conditions.
5820static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5821 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5822 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5823
5824 // As far as arithmetic right shift always saves the sign,
5825 // shift can be omitted.
5826 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5827 // setge (sra X, N), 0 -> setge X, 0
5828 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5829 LHS.getOpcode() == ISD::SRA) {
5830 LHS = LHS.getOperand(0);
5831 return true;
5832 }
5833
5834 if (!ISD::isIntEqualitySetCC(CCVal))
5835 return false;
5836
5837 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5838 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5839 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5840 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5841 // If we're looking for eq 0 instead of ne 0, we need to invert the
5842 // condition.
5843 bool Invert = CCVal == ISD::SETEQ;
5844 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5845 if (Invert)
5846 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5847
5848 RHS = LHS.getOperand(1);
5849 LHS = LHS.getOperand(0);
5850 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5851
5852 CC = DAG.getCondCode(CCVal);
5853 return true;
5854 }
5855
5856 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5857 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5858 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5859 SDValue LHS0 = LHS.getOperand(0);
5860 if (LHS0.getOpcode() == ISD::AND &&
5861 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5862 uint64_t Mask = LHS0.getConstantOperandVal(1);
5863 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5864 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5865 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5866 CC = DAG.getCondCode(CCVal);
5867
5868 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5869 LHS = LHS0.getOperand(0);
5870 if (ShAmt != 0)
5871 LHS =
5872 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5873 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5874 return true;
5875 }
5876 }
5877 }
5878
5879 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5880 // This can occur when legalizing some floating point comparisons.
5881 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5882 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5883 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5884 CC = DAG.getCondCode(CCVal);
5885 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5886 return true;
5887 }
5888
5889 return false;
5890}
5891
5894 const LoongArchSubtarget &Subtarget) {
5895 SDValue LHS = N->getOperand(1);
5896 SDValue RHS = N->getOperand(2);
5897 SDValue CC = N->getOperand(3);
5898 SDLoc DL(N);
5899
5900 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5901 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5902 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5903
5904 return SDValue();
5905}
5906
5909 const LoongArchSubtarget &Subtarget) {
5910 // Transform
5911 SDValue LHS = N->getOperand(0);
5912 SDValue RHS = N->getOperand(1);
5913 SDValue CC = N->getOperand(2);
5914 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5915 SDValue TrueV = N->getOperand(3);
5916 SDValue FalseV = N->getOperand(4);
5917 SDLoc DL(N);
5918 EVT VT = N->getValueType(0);
5919
5920 // If the True and False values are the same, we don't need a select_cc.
5921 if (TrueV == FalseV)
5922 return TrueV;
5923
5924 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5925 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5926 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5928 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5929 if (CCVal == ISD::CondCode::SETGE)
5930 std::swap(TrueV, FalseV);
5931
5932 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5933 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5934 // Only handle simm12, if it is not in this range, it can be considered as
5935 // register.
5936 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5937 isInt<12>(TrueSImm - FalseSImm)) {
5938 SDValue SRA =
5939 DAG.getNode(ISD::SRA, DL, VT, LHS,
5940 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5941 SDValue AND =
5942 DAG.getNode(ISD::AND, DL, VT, SRA,
5943 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5944 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5945 }
5946
5947 if (CCVal == ISD::CondCode::SETGE)
5948 std::swap(TrueV, FalseV);
5949 }
5950
5951 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5952 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5953 {LHS, RHS, CC, TrueV, FalseV});
5954
5955 return SDValue();
5956}
5957
5958template <unsigned N>
5960 SelectionDAG &DAG,
5961 const LoongArchSubtarget &Subtarget,
5962 bool IsSigned = false) {
5963 SDLoc DL(Node);
5964 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5965 // Check the ImmArg.
5966 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5967 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5968 DAG.getContext()->emitError(Node->getOperationName(0) +
5969 ": argument out of range.");
5970 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5971 }
5972 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5973}
5974
5975template <unsigned N>
5976static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5977 SelectionDAG &DAG, bool IsSigned = false) {
5978 SDLoc DL(Node);
5979 EVT ResTy = Node->getValueType(0);
5980 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5981
5982 // Check the ImmArg.
5983 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5984 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5985 DAG.getContext()->emitError(Node->getOperationName(0) +
5986 ": argument out of range.");
5987 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5988 }
5989 return DAG.getConstant(
5991 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5992 DL, ResTy);
5993}
5994
5996 SDLoc DL(Node);
5997 EVT ResTy = Node->getValueType(0);
5998 SDValue Vec = Node->getOperand(2);
5999 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6000 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6001}
6002
6004 SDLoc DL(Node);
6005 EVT ResTy = Node->getValueType(0);
6006 SDValue One = DAG.getConstant(1, DL, ResTy);
6007 SDValue Bit =
6008 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6009
6010 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6011 DAG.getNOT(DL, Bit, ResTy));
6012}
6013
6014template <unsigned N>
6016 SDLoc DL(Node);
6017 EVT ResTy = Node->getValueType(0);
6018 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6019 // Check the unsigned ImmArg.
6020 if (!isUInt<N>(CImm->getZExtValue())) {
6021 DAG.getContext()->emitError(Node->getOperationName(0) +
6022 ": argument out of range.");
6023 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6024 }
6025
6026 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6027 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6028
6029 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6030}
6031
6032template <unsigned N>
6034 SDLoc DL(Node);
6035 EVT ResTy = Node->getValueType(0);
6036 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6037 // Check the unsigned ImmArg.
6038 if (!isUInt<N>(CImm->getZExtValue())) {
6039 DAG.getContext()->emitError(Node->getOperationName(0) +
6040 ": argument out of range.");
6041 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6042 }
6043
6044 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6045 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6046 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6047}
6048
6049template <unsigned N>
6051 SDLoc DL(Node);
6052 EVT ResTy = Node->getValueType(0);
6053 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6054 // Check the unsigned ImmArg.
6055 if (!isUInt<N>(CImm->getZExtValue())) {
6056 DAG.getContext()->emitError(Node->getOperationName(0) +
6057 ": argument out of range.");
6058 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6059 }
6060
6061 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6062 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6063 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6064}
6065
6066template <unsigned W>
6068 unsigned ResOp) {
6069 unsigned Imm = N->getConstantOperandVal(2);
6070 if (!isUInt<W>(Imm)) {
6071 const StringRef ErrorMsg = "argument out of range";
6072 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6073 return DAG.getUNDEF(N->getValueType(0));
6074 }
6075 SDLoc DL(N);
6076 SDValue Vec = N->getOperand(1);
6077 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6079 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6080}
6081
6082static SDValue
6085 const LoongArchSubtarget &Subtarget) {
6086 SDLoc DL(N);
6087 switch (N->getConstantOperandVal(0)) {
6088 default:
6089 break;
6090 case Intrinsic::loongarch_lsx_vadd_b:
6091 case Intrinsic::loongarch_lsx_vadd_h:
6092 case Intrinsic::loongarch_lsx_vadd_w:
6093 case Intrinsic::loongarch_lsx_vadd_d:
6094 case Intrinsic::loongarch_lasx_xvadd_b:
6095 case Intrinsic::loongarch_lasx_xvadd_h:
6096 case Intrinsic::loongarch_lasx_xvadd_w:
6097 case Intrinsic::loongarch_lasx_xvadd_d:
6098 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6099 N->getOperand(2));
6100 case Intrinsic::loongarch_lsx_vaddi_bu:
6101 case Intrinsic::loongarch_lsx_vaddi_hu:
6102 case Intrinsic::loongarch_lsx_vaddi_wu:
6103 case Intrinsic::loongarch_lsx_vaddi_du:
6104 case Intrinsic::loongarch_lasx_xvaddi_bu:
6105 case Intrinsic::loongarch_lasx_xvaddi_hu:
6106 case Intrinsic::loongarch_lasx_xvaddi_wu:
6107 case Intrinsic::loongarch_lasx_xvaddi_du:
6108 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6109 lowerVectorSplatImm<5>(N, 2, DAG));
6110 case Intrinsic::loongarch_lsx_vsub_b:
6111 case Intrinsic::loongarch_lsx_vsub_h:
6112 case Intrinsic::loongarch_lsx_vsub_w:
6113 case Intrinsic::loongarch_lsx_vsub_d:
6114 case Intrinsic::loongarch_lasx_xvsub_b:
6115 case Intrinsic::loongarch_lasx_xvsub_h:
6116 case Intrinsic::loongarch_lasx_xvsub_w:
6117 case Intrinsic::loongarch_lasx_xvsub_d:
6118 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6119 N->getOperand(2));
6120 case Intrinsic::loongarch_lsx_vsubi_bu:
6121 case Intrinsic::loongarch_lsx_vsubi_hu:
6122 case Intrinsic::loongarch_lsx_vsubi_wu:
6123 case Intrinsic::loongarch_lsx_vsubi_du:
6124 case Intrinsic::loongarch_lasx_xvsubi_bu:
6125 case Intrinsic::loongarch_lasx_xvsubi_hu:
6126 case Intrinsic::loongarch_lasx_xvsubi_wu:
6127 case Intrinsic::loongarch_lasx_xvsubi_du:
6128 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6129 lowerVectorSplatImm<5>(N, 2, DAG));
6130 case Intrinsic::loongarch_lsx_vneg_b:
6131 case Intrinsic::loongarch_lsx_vneg_h:
6132 case Intrinsic::loongarch_lsx_vneg_w:
6133 case Intrinsic::loongarch_lsx_vneg_d:
6134 case Intrinsic::loongarch_lasx_xvneg_b:
6135 case Intrinsic::loongarch_lasx_xvneg_h:
6136 case Intrinsic::loongarch_lasx_xvneg_w:
6137 case Intrinsic::loongarch_lasx_xvneg_d:
6138 return DAG.getNode(
6139 ISD::SUB, DL, N->getValueType(0),
6140 DAG.getConstant(
6141 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6142 /*isSigned=*/true),
6143 SDLoc(N), N->getValueType(0)),
6144 N->getOperand(1));
6145 case Intrinsic::loongarch_lsx_vmax_b:
6146 case Intrinsic::loongarch_lsx_vmax_h:
6147 case Intrinsic::loongarch_lsx_vmax_w:
6148 case Intrinsic::loongarch_lsx_vmax_d:
6149 case Intrinsic::loongarch_lasx_xvmax_b:
6150 case Intrinsic::loongarch_lasx_xvmax_h:
6151 case Intrinsic::loongarch_lasx_xvmax_w:
6152 case Intrinsic::loongarch_lasx_xvmax_d:
6153 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6154 N->getOperand(2));
6155 case Intrinsic::loongarch_lsx_vmax_bu:
6156 case Intrinsic::loongarch_lsx_vmax_hu:
6157 case Intrinsic::loongarch_lsx_vmax_wu:
6158 case Intrinsic::loongarch_lsx_vmax_du:
6159 case Intrinsic::loongarch_lasx_xvmax_bu:
6160 case Intrinsic::loongarch_lasx_xvmax_hu:
6161 case Intrinsic::loongarch_lasx_xvmax_wu:
6162 case Intrinsic::loongarch_lasx_xvmax_du:
6163 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6164 N->getOperand(2));
6165 case Intrinsic::loongarch_lsx_vmaxi_b:
6166 case Intrinsic::loongarch_lsx_vmaxi_h:
6167 case Intrinsic::loongarch_lsx_vmaxi_w:
6168 case Intrinsic::loongarch_lsx_vmaxi_d:
6169 case Intrinsic::loongarch_lasx_xvmaxi_b:
6170 case Intrinsic::loongarch_lasx_xvmaxi_h:
6171 case Intrinsic::loongarch_lasx_xvmaxi_w:
6172 case Intrinsic::loongarch_lasx_xvmaxi_d:
6173 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6174 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6175 case Intrinsic::loongarch_lsx_vmaxi_bu:
6176 case Intrinsic::loongarch_lsx_vmaxi_hu:
6177 case Intrinsic::loongarch_lsx_vmaxi_wu:
6178 case Intrinsic::loongarch_lsx_vmaxi_du:
6179 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6180 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6181 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6182 case Intrinsic::loongarch_lasx_xvmaxi_du:
6183 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6184 lowerVectorSplatImm<5>(N, 2, DAG));
6185 case Intrinsic::loongarch_lsx_vmin_b:
6186 case Intrinsic::loongarch_lsx_vmin_h:
6187 case Intrinsic::loongarch_lsx_vmin_w:
6188 case Intrinsic::loongarch_lsx_vmin_d:
6189 case Intrinsic::loongarch_lasx_xvmin_b:
6190 case Intrinsic::loongarch_lasx_xvmin_h:
6191 case Intrinsic::loongarch_lasx_xvmin_w:
6192 case Intrinsic::loongarch_lasx_xvmin_d:
6193 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6194 N->getOperand(2));
6195 case Intrinsic::loongarch_lsx_vmin_bu:
6196 case Intrinsic::loongarch_lsx_vmin_hu:
6197 case Intrinsic::loongarch_lsx_vmin_wu:
6198 case Intrinsic::loongarch_lsx_vmin_du:
6199 case Intrinsic::loongarch_lasx_xvmin_bu:
6200 case Intrinsic::loongarch_lasx_xvmin_hu:
6201 case Intrinsic::loongarch_lasx_xvmin_wu:
6202 case Intrinsic::loongarch_lasx_xvmin_du:
6203 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6204 N->getOperand(2));
6205 case Intrinsic::loongarch_lsx_vmini_b:
6206 case Intrinsic::loongarch_lsx_vmini_h:
6207 case Intrinsic::loongarch_lsx_vmini_w:
6208 case Intrinsic::loongarch_lsx_vmini_d:
6209 case Intrinsic::loongarch_lasx_xvmini_b:
6210 case Intrinsic::loongarch_lasx_xvmini_h:
6211 case Intrinsic::loongarch_lasx_xvmini_w:
6212 case Intrinsic::loongarch_lasx_xvmini_d:
6213 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6214 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6215 case Intrinsic::loongarch_lsx_vmini_bu:
6216 case Intrinsic::loongarch_lsx_vmini_hu:
6217 case Intrinsic::loongarch_lsx_vmini_wu:
6218 case Intrinsic::loongarch_lsx_vmini_du:
6219 case Intrinsic::loongarch_lasx_xvmini_bu:
6220 case Intrinsic::loongarch_lasx_xvmini_hu:
6221 case Intrinsic::loongarch_lasx_xvmini_wu:
6222 case Intrinsic::loongarch_lasx_xvmini_du:
6223 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6224 lowerVectorSplatImm<5>(N, 2, DAG));
6225 case Intrinsic::loongarch_lsx_vmul_b:
6226 case Intrinsic::loongarch_lsx_vmul_h:
6227 case Intrinsic::loongarch_lsx_vmul_w:
6228 case Intrinsic::loongarch_lsx_vmul_d:
6229 case Intrinsic::loongarch_lasx_xvmul_b:
6230 case Intrinsic::loongarch_lasx_xvmul_h:
6231 case Intrinsic::loongarch_lasx_xvmul_w:
6232 case Intrinsic::loongarch_lasx_xvmul_d:
6233 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6234 N->getOperand(2));
6235 case Intrinsic::loongarch_lsx_vmadd_b:
6236 case Intrinsic::loongarch_lsx_vmadd_h:
6237 case Intrinsic::loongarch_lsx_vmadd_w:
6238 case Intrinsic::loongarch_lsx_vmadd_d:
6239 case Intrinsic::loongarch_lasx_xvmadd_b:
6240 case Intrinsic::loongarch_lasx_xvmadd_h:
6241 case Intrinsic::loongarch_lasx_xvmadd_w:
6242 case Intrinsic::loongarch_lasx_xvmadd_d: {
6243 EVT ResTy = N->getValueType(0);
6244 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6245 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6246 N->getOperand(3)));
6247 }
6248 case Intrinsic::loongarch_lsx_vmsub_b:
6249 case Intrinsic::loongarch_lsx_vmsub_h:
6250 case Intrinsic::loongarch_lsx_vmsub_w:
6251 case Intrinsic::loongarch_lsx_vmsub_d:
6252 case Intrinsic::loongarch_lasx_xvmsub_b:
6253 case Intrinsic::loongarch_lasx_xvmsub_h:
6254 case Intrinsic::loongarch_lasx_xvmsub_w:
6255 case Intrinsic::loongarch_lasx_xvmsub_d: {
6256 EVT ResTy = N->getValueType(0);
6257 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6258 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6259 N->getOperand(3)));
6260 }
6261 case Intrinsic::loongarch_lsx_vdiv_b:
6262 case Intrinsic::loongarch_lsx_vdiv_h:
6263 case Intrinsic::loongarch_lsx_vdiv_w:
6264 case Intrinsic::loongarch_lsx_vdiv_d:
6265 case Intrinsic::loongarch_lasx_xvdiv_b:
6266 case Intrinsic::loongarch_lasx_xvdiv_h:
6267 case Intrinsic::loongarch_lasx_xvdiv_w:
6268 case Intrinsic::loongarch_lasx_xvdiv_d:
6269 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6270 N->getOperand(2));
6271 case Intrinsic::loongarch_lsx_vdiv_bu:
6272 case Intrinsic::loongarch_lsx_vdiv_hu:
6273 case Intrinsic::loongarch_lsx_vdiv_wu:
6274 case Intrinsic::loongarch_lsx_vdiv_du:
6275 case Intrinsic::loongarch_lasx_xvdiv_bu:
6276 case Intrinsic::loongarch_lasx_xvdiv_hu:
6277 case Intrinsic::loongarch_lasx_xvdiv_wu:
6278 case Intrinsic::loongarch_lasx_xvdiv_du:
6279 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6280 N->getOperand(2));
6281 case Intrinsic::loongarch_lsx_vmod_b:
6282 case Intrinsic::loongarch_lsx_vmod_h:
6283 case Intrinsic::loongarch_lsx_vmod_w:
6284 case Intrinsic::loongarch_lsx_vmod_d:
6285 case Intrinsic::loongarch_lasx_xvmod_b:
6286 case Intrinsic::loongarch_lasx_xvmod_h:
6287 case Intrinsic::loongarch_lasx_xvmod_w:
6288 case Intrinsic::loongarch_lasx_xvmod_d:
6289 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6290 N->getOperand(2));
6291 case Intrinsic::loongarch_lsx_vmod_bu:
6292 case Intrinsic::loongarch_lsx_vmod_hu:
6293 case Intrinsic::loongarch_lsx_vmod_wu:
6294 case Intrinsic::loongarch_lsx_vmod_du:
6295 case Intrinsic::loongarch_lasx_xvmod_bu:
6296 case Intrinsic::loongarch_lasx_xvmod_hu:
6297 case Intrinsic::loongarch_lasx_xvmod_wu:
6298 case Intrinsic::loongarch_lasx_xvmod_du:
6299 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6300 N->getOperand(2));
6301 case Intrinsic::loongarch_lsx_vand_v:
6302 case Intrinsic::loongarch_lasx_xvand_v:
6303 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6304 N->getOperand(2));
6305 case Intrinsic::loongarch_lsx_vor_v:
6306 case Intrinsic::loongarch_lasx_xvor_v:
6307 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6308 N->getOperand(2));
6309 case Intrinsic::loongarch_lsx_vxor_v:
6310 case Intrinsic::loongarch_lasx_xvxor_v:
6311 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6312 N->getOperand(2));
6313 case Intrinsic::loongarch_lsx_vnor_v:
6314 case Intrinsic::loongarch_lasx_xvnor_v: {
6315 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6316 N->getOperand(2));
6317 return DAG.getNOT(DL, Res, Res->getValueType(0));
6318 }
6319 case Intrinsic::loongarch_lsx_vandi_b:
6320 case Intrinsic::loongarch_lasx_xvandi_b:
6321 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6322 lowerVectorSplatImm<8>(N, 2, DAG));
6323 case Intrinsic::loongarch_lsx_vori_b:
6324 case Intrinsic::loongarch_lasx_xvori_b:
6325 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6326 lowerVectorSplatImm<8>(N, 2, DAG));
6327 case Intrinsic::loongarch_lsx_vxori_b:
6328 case Intrinsic::loongarch_lasx_xvxori_b:
6329 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6330 lowerVectorSplatImm<8>(N, 2, DAG));
6331 case Intrinsic::loongarch_lsx_vsll_b:
6332 case Intrinsic::loongarch_lsx_vsll_h:
6333 case Intrinsic::loongarch_lsx_vsll_w:
6334 case Intrinsic::loongarch_lsx_vsll_d:
6335 case Intrinsic::loongarch_lasx_xvsll_b:
6336 case Intrinsic::loongarch_lasx_xvsll_h:
6337 case Intrinsic::loongarch_lasx_xvsll_w:
6338 case Intrinsic::loongarch_lasx_xvsll_d:
6339 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6340 truncateVecElts(N, DAG));
6341 case Intrinsic::loongarch_lsx_vslli_b:
6342 case Intrinsic::loongarch_lasx_xvslli_b:
6343 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6344 lowerVectorSplatImm<3>(N, 2, DAG));
6345 case Intrinsic::loongarch_lsx_vslli_h:
6346 case Intrinsic::loongarch_lasx_xvslli_h:
6347 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6348 lowerVectorSplatImm<4>(N, 2, DAG));
6349 case Intrinsic::loongarch_lsx_vslli_w:
6350 case Intrinsic::loongarch_lasx_xvslli_w:
6351 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6352 lowerVectorSplatImm<5>(N, 2, DAG));
6353 case Intrinsic::loongarch_lsx_vslli_d:
6354 case Intrinsic::loongarch_lasx_xvslli_d:
6355 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6356 lowerVectorSplatImm<6>(N, 2, DAG));
6357 case Intrinsic::loongarch_lsx_vsrl_b:
6358 case Intrinsic::loongarch_lsx_vsrl_h:
6359 case Intrinsic::loongarch_lsx_vsrl_w:
6360 case Intrinsic::loongarch_lsx_vsrl_d:
6361 case Intrinsic::loongarch_lasx_xvsrl_b:
6362 case Intrinsic::loongarch_lasx_xvsrl_h:
6363 case Intrinsic::loongarch_lasx_xvsrl_w:
6364 case Intrinsic::loongarch_lasx_xvsrl_d:
6365 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6366 truncateVecElts(N, DAG));
6367 case Intrinsic::loongarch_lsx_vsrli_b:
6368 case Intrinsic::loongarch_lasx_xvsrli_b:
6369 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6370 lowerVectorSplatImm<3>(N, 2, DAG));
6371 case Intrinsic::loongarch_lsx_vsrli_h:
6372 case Intrinsic::loongarch_lasx_xvsrli_h:
6373 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6374 lowerVectorSplatImm<4>(N, 2, DAG));
6375 case Intrinsic::loongarch_lsx_vsrli_w:
6376 case Intrinsic::loongarch_lasx_xvsrli_w:
6377 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6378 lowerVectorSplatImm<5>(N, 2, DAG));
6379 case Intrinsic::loongarch_lsx_vsrli_d:
6380 case Intrinsic::loongarch_lasx_xvsrli_d:
6381 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6382 lowerVectorSplatImm<6>(N, 2, DAG));
6383 case Intrinsic::loongarch_lsx_vsra_b:
6384 case Intrinsic::loongarch_lsx_vsra_h:
6385 case Intrinsic::loongarch_lsx_vsra_w:
6386 case Intrinsic::loongarch_lsx_vsra_d:
6387 case Intrinsic::loongarch_lasx_xvsra_b:
6388 case Intrinsic::loongarch_lasx_xvsra_h:
6389 case Intrinsic::loongarch_lasx_xvsra_w:
6390 case Intrinsic::loongarch_lasx_xvsra_d:
6391 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6392 truncateVecElts(N, DAG));
6393 case Intrinsic::loongarch_lsx_vsrai_b:
6394 case Intrinsic::loongarch_lasx_xvsrai_b:
6395 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6396 lowerVectorSplatImm<3>(N, 2, DAG));
6397 case Intrinsic::loongarch_lsx_vsrai_h:
6398 case Intrinsic::loongarch_lasx_xvsrai_h:
6399 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6400 lowerVectorSplatImm<4>(N, 2, DAG));
6401 case Intrinsic::loongarch_lsx_vsrai_w:
6402 case Intrinsic::loongarch_lasx_xvsrai_w:
6403 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6404 lowerVectorSplatImm<5>(N, 2, DAG));
6405 case Intrinsic::loongarch_lsx_vsrai_d:
6406 case Intrinsic::loongarch_lasx_xvsrai_d:
6407 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6408 lowerVectorSplatImm<6>(N, 2, DAG));
6409 case Intrinsic::loongarch_lsx_vclz_b:
6410 case Intrinsic::loongarch_lsx_vclz_h:
6411 case Intrinsic::loongarch_lsx_vclz_w:
6412 case Intrinsic::loongarch_lsx_vclz_d:
6413 case Intrinsic::loongarch_lasx_xvclz_b:
6414 case Intrinsic::loongarch_lasx_xvclz_h:
6415 case Intrinsic::loongarch_lasx_xvclz_w:
6416 case Intrinsic::loongarch_lasx_xvclz_d:
6417 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6418 case Intrinsic::loongarch_lsx_vpcnt_b:
6419 case Intrinsic::loongarch_lsx_vpcnt_h:
6420 case Intrinsic::loongarch_lsx_vpcnt_w:
6421 case Intrinsic::loongarch_lsx_vpcnt_d:
6422 case Intrinsic::loongarch_lasx_xvpcnt_b:
6423 case Intrinsic::loongarch_lasx_xvpcnt_h:
6424 case Intrinsic::loongarch_lasx_xvpcnt_w:
6425 case Intrinsic::loongarch_lasx_xvpcnt_d:
6426 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6427 case Intrinsic::loongarch_lsx_vbitclr_b:
6428 case Intrinsic::loongarch_lsx_vbitclr_h:
6429 case Intrinsic::loongarch_lsx_vbitclr_w:
6430 case Intrinsic::loongarch_lsx_vbitclr_d:
6431 case Intrinsic::loongarch_lasx_xvbitclr_b:
6432 case Intrinsic::loongarch_lasx_xvbitclr_h:
6433 case Intrinsic::loongarch_lasx_xvbitclr_w:
6434 case Intrinsic::loongarch_lasx_xvbitclr_d:
6435 return lowerVectorBitClear(N, DAG);
6436 case Intrinsic::loongarch_lsx_vbitclri_b:
6437 case Intrinsic::loongarch_lasx_xvbitclri_b:
6438 return lowerVectorBitClearImm<3>(N, DAG);
6439 case Intrinsic::loongarch_lsx_vbitclri_h:
6440 case Intrinsic::loongarch_lasx_xvbitclri_h:
6441 return lowerVectorBitClearImm<4>(N, DAG);
6442 case Intrinsic::loongarch_lsx_vbitclri_w:
6443 case Intrinsic::loongarch_lasx_xvbitclri_w:
6444 return lowerVectorBitClearImm<5>(N, DAG);
6445 case Intrinsic::loongarch_lsx_vbitclri_d:
6446 case Intrinsic::loongarch_lasx_xvbitclri_d:
6447 return lowerVectorBitClearImm<6>(N, DAG);
6448 case Intrinsic::loongarch_lsx_vbitset_b:
6449 case Intrinsic::loongarch_lsx_vbitset_h:
6450 case Intrinsic::loongarch_lsx_vbitset_w:
6451 case Intrinsic::loongarch_lsx_vbitset_d:
6452 case Intrinsic::loongarch_lasx_xvbitset_b:
6453 case Intrinsic::loongarch_lasx_xvbitset_h:
6454 case Intrinsic::loongarch_lasx_xvbitset_w:
6455 case Intrinsic::loongarch_lasx_xvbitset_d: {
6456 EVT VecTy = N->getValueType(0);
6457 SDValue One = DAG.getConstant(1, DL, VecTy);
6458 return DAG.getNode(
6459 ISD::OR, DL, VecTy, N->getOperand(1),
6460 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6461 }
6462 case Intrinsic::loongarch_lsx_vbitseti_b:
6463 case Intrinsic::loongarch_lasx_xvbitseti_b:
6464 return lowerVectorBitSetImm<3>(N, DAG);
6465 case Intrinsic::loongarch_lsx_vbitseti_h:
6466 case Intrinsic::loongarch_lasx_xvbitseti_h:
6467 return lowerVectorBitSetImm<4>(N, DAG);
6468 case Intrinsic::loongarch_lsx_vbitseti_w:
6469 case Intrinsic::loongarch_lasx_xvbitseti_w:
6470 return lowerVectorBitSetImm<5>(N, DAG);
6471 case Intrinsic::loongarch_lsx_vbitseti_d:
6472 case Intrinsic::loongarch_lasx_xvbitseti_d:
6473 return lowerVectorBitSetImm<6>(N, DAG);
6474 case Intrinsic::loongarch_lsx_vbitrev_b:
6475 case Intrinsic::loongarch_lsx_vbitrev_h:
6476 case Intrinsic::loongarch_lsx_vbitrev_w:
6477 case Intrinsic::loongarch_lsx_vbitrev_d:
6478 case Intrinsic::loongarch_lasx_xvbitrev_b:
6479 case Intrinsic::loongarch_lasx_xvbitrev_h:
6480 case Intrinsic::loongarch_lasx_xvbitrev_w:
6481 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6482 EVT VecTy = N->getValueType(0);
6483 SDValue One = DAG.getConstant(1, DL, VecTy);
6484 return DAG.getNode(
6485 ISD::XOR, DL, VecTy, N->getOperand(1),
6486 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6487 }
6488 case Intrinsic::loongarch_lsx_vbitrevi_b:
6489 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6490 return lowerVectorBitRevImm<3>(N, DAG);
6491 case Intrinsic::loongarch_lsx_vbitrevi_h:
6492 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6493 return lowerVectorBitRevImm<4>(N, DAG);
6494 case Intrinsic::loongarch_lsx_vbitrevi_w:
6495 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6496 return lowerVectorBitRevImm<5>(N, DAG);
6497 case Intrinsic::loongarch_lsx_vbitrevi_d:
6498 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6499 return lowerVectorBitRevImm<6>(N, DAG);
6500 case Intrinsic::loongarch_lsx_vfadd_s:
6501 case Intrinsic::loongarch_lsx_vfadd_d:
6502 case Intrinsic::loongarch_lasx_xvfadd_s:
6503 case Intrinsic::loongarch_lasx_xvfadd_d:
6504 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6505 N->getOperand(2));
6506 case Intrinsic::loongarch_lsx_vfsub_s:
6507 case Intrinsic::loongarch_lsx_vfsub_d:
6508 case Intrinsic::loongarch_lasx_xvfsub_s:
6509 case Intrinsic::loongarch_lasx_xvfsub_d:
6510 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6511 N->getOperand(2));
6512 case Intrinsic::loongarch_lsx_vfmul_s:
6513 case Intrinsic::loongarch_lsx_vfmul_d:
6514 case Intrinsic::loongarch_lasx_xvfmul_s:
6515 case Intrinsic::loongarch_lasx_xvfmul_d:
6516 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6517 N->getOperand(2));
6518 case Intrinsic::loongarch_lsx_vfdiv_s:
6519 case Intrinsic::loongarch_lsx_vfdiv_d:
6520 case Intrinsic::loongarch_lasx_xvfdiv_s:
6521 case Intrinsic::loongarch_lasx_xvfdiv_d:
6522 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6523 N->getOperand(2));
6524 case Intrinsic::loongarch_lsx_vfmadd_s:
6525 case Intrinsic::loongarch_lsx_vfmadd_d:
6526 case Intrinsic::loongarch_lasx_xvfmadd_s:
6527 case Intrinsic::loongarch_lasx_xvfmadd_d:
6528 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6529 N->getOperand(2), N->getOperand(3));
6530 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6531 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6532 N->getOperand(1), N->getOperand(2),
6533 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6534 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6535 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6536 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6537 N->getOperand(1), N->getOperand(2),
6538 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6539 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6540 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6541 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6542 N->getOperand(1), N->getOperand(2),
6543 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6544 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6545 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6546 N->getOperand(1), N->getOperand(2),
6547 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6548 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6549 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6550 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6551 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6552 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6553 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6554 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6555 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6556 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6557 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6558 N->getOperand(1)));
6559 case Intrinsic::loongarch_lsx_vreplve_b:
6560 case Intrinsic::loongarch_lsx_vreplve_h:
6561 case Intrinsic::loongarch_lsx_vreplve_w:
6562 case Intrinsic::loongarch_lsx_vreplve_d:
6563 case Intrinsic::loongarch_lasx_xvreplve_b:
6564 case Intrinsic::loongarch_lasx_xvreplve_h:
6565 case Intrinsic::loongarch_lasx_xvreplve_w:
6566 case Intrinsic::loongarch_lasx_xvreplve_d:
6567 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6568 N->getOperand(1),
6569 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6570 N->getOperand(2)));
6571 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6572 if (!Subtarget.is64Bit())
6574 break;
6575 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6576 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6577 if (!Subtarget.is64Bit())
6579 break;
6580 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6581 if (!Subtarget.is64Bit())
6583 break;
6584 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6585 if (!Subtarget.is64Bit())
6587 break;
6588 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6589 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6590 if (!Subtarget.is64Bit())
6592 break;
6593 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6594 if (!Subtarget.is64Bit())
6596 break;
6597 case Intrinsic::loongarch_lsx_bz_b:
6598 case Intrinsic::loongarch_lsx_bz_h:
6599 case Intrinsic::loongarch_lsx_bz_w:
6600 case Intrinsic::loongarch_lsx_bz_d:
6601 case Intrinsic::loongarch_lasx_xbz_b:
6602 case Intrinsic::loongarch_lasx_xbz_h:
6603 case Intrinsic::loongarch_lasx_xbz_w:
6604 case Intrinsic::loongarch_lasx_xbz_d:
6605 if (!Subtarget.is64Bit())
6606 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6607 N->getOperand(1));
6608 break;
6609 case Intrinsic::loongarch_lsx_bz_v:
6610 case Intrinsic::loongarch_lasx_xbz_v:
6611 if (!Subtarget.is64Bit())
6612 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6613 N->getOperand(1));
6614 break;
6615 case Intrinsic::loongarch_lsx_bnz_b:
6616 case Intrinsic::loongarch_lsx_bnz_h:
6617 case Intrinsic::loongarch_lsx_bnz_w:
6618 case Intrinsic::loongarch_lsx_bnz_d:
6619 case Intrinsic::loongarch_lasx_xbnz_b:
6620 case Intrinsic::loongarch_lasx_xbnz_h:
6621 case Intrinsic::loongarch_lasx_xbnz_w:
6622 case Intrinsic::loongarch_lasx_xbnz_d:
6623 if (!Subtarget.is64Bit())
6624 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6625 N->getOperand(1));
6626 break;
6627 case Intrinsic::loongarch_lsx_bnz_v:
6628 case Intrinsic::loongarch_lasx_xbnz_v:
6629 if (!Subtarget.is64Bit())
6630 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6631 N->getOperand(1));
6632 break;
6633 case Intrinsic::loongarch_lasx_concat_128_s:
6634 case Intrinsic::loongarch_lasx_concat_128_d:
6635 case Intrinsic::loongarch_lasx_concat_128:
6636 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6637 N->getOperand(1), N->getOperand(2));
6638 }
6639 return SDValue();
6640}
6641
6644 const LoongArchSubtarget &Subtarget) {
6645 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6646 // conversion is unnecessary and can be replaced with the
6647 // MOVFR2GR_S_LA64 operand.
6648 SDValue Op0 = N->getOperand(0);
6650 return Op0.getOperand(0);
6651 return SDValue();
6652}
6653
6656 const LoongArchSubtarget &Subtarget) {
6657 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6658 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6659 // operand.
6660 SDValue Op0 = N->getOperand(0);
6662 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6663 "Unexpected value type!");
6664 return Op0.getOperand(0);
6665 }
6666 return SDValue();
6667}
6668
6671 const LoongArchSubtarget &Subtarget) {
6672 MVT VT = N->getSimpleValueType(0);
6673 unsigned NumBits = VT.getScalarSizeInBits();
6674
6675 // Simplify the inputs.
6676 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6677 APInt DemandedMask(APInt::getAllOnes(NumBits));
6678 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6679 return SDValue(N, 0);
6680
6681 return SDValue();
6682}
6683
6684static SDValue
6687 const LoongArchSubtarget &Subtarget) {
6688 SDValue Op0 = N->getOperand(0);
6689 SDLoc DL(N);
6690
6691 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6692 // redundant. Instead, use BuildPairF64's operands directly.
6694 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6695
6696 if (Op0->isUndef()) {
6697 SDValue Lo = DAG.getUNDEF(MVT::i32);
6698 SDValue Hi = DAG.getUNDEF(MVT::i32);
6699 return DCI.CombineTo(N, Lo, Hi);
6700 }
6701
6702 // It's cheaper to materialise two 32-bit integers than to load a double
6703 // from the constant pool and transfer it to integer registers through the
6704 // stack.
6706 APInt V = C->getValueAPF().bitcastToAPInt();
6707 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6708 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6709 return DCI.CombineTo(N, Lo, Hi);
6710 }
6711
6712 return SDValue();
6713}
6714
6715static SDValue
6718 const LoongArchSubtarget &Subtarget) {
6719 if (!DCI.isBeforeLegalize())
6720 return SDValue();
6721
6722 MVT EltVT = N->getSimpleValueType(0);
6723 SDValue Vec = N->getOperand(0);
6724 EVT VecTy = Vec->getValueType(0);
6725 SDValue Idx = N->getOperand(1);
6726 unsigned IdxOp = Idx.getOpcode();
6727 SDLoc DL(N);
6728
6729 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6730 return SDValue();
6731
6732 // Combine:
6733 // t2 = truncate t1
6734 // t3 = {zero/sign/any}_extend t2
6735 // t4 = extract_vector_elt t0, t3
6736 // to:
6737 // t4 = extract_vector_elt t0, t1
6738 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6739 IdxOp == ISD::ANY_EXTEND) {
6740 SDValue IdxOrig = Idx.getOperand(0);
6741 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6742 return SDValue();
6743
6744 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6745 IdxOrig.getOperand(0));
6746 }
6747
6748 return SDValue();
6749}
6750
6752 DAGCombinerInfo &DCI) const {
6753 SelectionDAG &DAG = DCI.DAG;
6754 switch (N->getOpcode()) {
6755 default:
6756 break;
6757 case ISD::AND:
6758 return performANDCombine(N, DAG, DCI, Subtarget);
6759 case ISD::OR:
6760 return performORCombine(N, DAG, DCI, Subtarget);
6761 case ISD::SETCC:
6762 return performSETCCCombine(N, DAG, DCI, Subtarget);
6763 case ISD::SRL:
6764 return performSRLCombine(N, DAG, DCI, Subtarget);
6765 case ISD::BITCAST:
6766 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6768 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6770 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6772 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6774 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6776 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6778 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6781 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6783 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6785 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6786 }
6787 return SDValue();
6788}
6789
6792 if (!ZeroDivCheck)
6793 return MBB;
6794
6795 // Build instructions:
6796 // MBB:
6797 // div(or mod) $dst, $dividend, $divisor
6798 // bne $divisor, $zero, SinkMBB
6799 // BreakMBB:
6800 // break 7 // BRK_DIVZERO
6801 // SinkMBB:
6802 // fallthrough
6803 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6804 MachineFunction::iterator It = ++MBB->getIterator();
6805 MachineFunction *MF = MBB->getParent();
6806 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6807 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6808 MF->insert(It, BreakMBB);
6809 MF->insert(It, SinkMBB);
6810
6811 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6812 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6813 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6814
6815 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6816 DebugLoc DL = MI.getDebugLoc();
6817 MachineOperand &Divisor = MI.getOperand(2);
6818 Register DivisorReg = Divisor.getReg();
6819
6820 // MBB:
6821 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6822 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6823 .addReg(LoongArch::R0)
6824 .addMBB(SinkMBB);
6825 MBB->addSuccessor(BreakMBB);
6826 MBB->addSuccessor(SinkMBB);
6827
6828 // BreakMBB:
6829 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6830 // definition of BRK_DIVZERO.
6831 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6832 BreakMBB->addSuccessor(SinkMBB);
6833
6834 // Clear Divisor's kill flag.
6835 Divisor.setIsKill(false);
6836
6837 return SinkMBB;
6838}
6839
6840static MachineBasicBlock *
6842 const LoongArchSubtarget &Subtarget) {
6843 unsigned CondOpc;
6844 switch (MI.getOpcode()) {
6845 default:
6846 llvm_unreachable("Unexpected opcode");
6847 case LoongArch::PseudoVBZ:
6848 CondOpc = LoongArch::VSETEQZ_V;
6849 break;
6850 case LoongArch::PseudoVBZ_B:
6851 CondOpc = LoongArch::VSETANYEQZ_B;
6852 break;
6853 case LoongArch::PseudoVBZ_H:
6854 CondOpc = LoongArch::VSETANYEQZ_H;
6855 break;
6856 case LoongArch::PseudoVBZ_W:
6857 CondOpc = LoongArch::VSETANYEQZ_W;
6858 break;
6859 case LoongArch::PseudoVBZ_D:
6860 CondOpc = LoongArch::VSETANYEQZ_D;
6861 break;
6862 case LoongArch::PseudoVBNZ:
6863 CondOpc = LoongArch::VSETNEZ_V;
6864 break;
6865 case LoongArch::PseudoVBNZ_B:
6866 CondOpc = LoongArch::VSETALLNEZ_B;
6867 break;
6868 case LoongArch::PseudoVBNZ_H:
6869 CondOpc = LoongArch::VSETALLNEZ_H;
6870 break;
6871 case LoongArch::PseudoVBNZ_W:
6872 CondOpc = LoongArch::VSETALLNEZ_W;
6873 break;
6874 case LoongArch::PseudoVBNZ_D:
6875 CondOpc = LoongArch::VSETALLNEZ_D;
6876 break;
6877 case LoongArch::PseudoXVBZ:
6878 CondOpc = LoongArch::XVSETEQZ_V;
6879 break;
6880 case LoongArch::PseudoXVBZ_B:
6881 CondOpc = LoongArch::XVSETANYEQZ_B;
6882 break;
6883 case LoongArch::PseudoXVBZ_H:
6884 CondOpc = LoongArch::XVSETANYEQZ_H;
6885 break;
6886 case LoongArch::PseudoXVBZ_W:
6887 CondOpc = LoongArch::XVSETANYEQZ_W;
6888 break;
6889 case LoongArch::PseudoXVBZ_D:
6890 CondOpc = LoongArch::XVSETANYEQZ_D;
6891 break;
6892 case LoongArch::PseudoXVBNZ:
6893 CondOpc = LoongArch::XVSETNEZ_V;
6894 break;
6895 case LoongArch::PseudoXVBNZ_B:
6896 CondOpc = LoongArch::XVSETALLNEZ_B;
6897 break;
6898 case LoongArch::PseudoXVBNZ_H:
6899 CondOpc = LoongArch::XVSETALLNEZ_H;
6900 break;
6901 case LoongArch::PseudoXVBNZ_W:
6902 CondOpc = LoongArch::XVSETALLNEZ_W;
6903 break;
6904 case LoongArch::PseudoXVBNZ_D:
6905 CondOpc = LoongArch::XVSETALLNEZ_D;
6906 break;
6907 }
6908
6909 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6910 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6911 DebugLoc DL = MI.getDebugLoc();
6914
6915 MachineFunction *F = BB->getParent();
6916 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6917 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6918 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6919
6920 F->insert(It, FalseBB);
6921 F->insert(It, TrueBB);
6922 F->insert(It, SinkBB);
6923
6924 // Transfer the remainder of MBB and its successor edges to Sink.
6925 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6927
6928 // Insert the real instruction to BB.
6929 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6930 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6931
6932 // Insert branch.
6933 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6934 BB->addSuccessor(FalseBB);
6935 BB->addSuccessor(TrueBB);
6936
6937 // FalseBB.
6938 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6939 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6940 .addReg(LoongArch::R0)
6941 .addImm(0);
6942 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6943 FalseBB->addSuccessor(SinkBB);
6944
6945 // TrueBB.
6946 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6947 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6948 .addReg(LoongArch::R0)
6949 .addImm(1);
6950 TrueBB->addSuccessor(SinkBB);
6951
6952 // SinkBB: merge the results.
6953 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6954 MI.getOperand(0).getReg())
6955 .addReg(RD1)
6956 .addMBB(FalseBB)
6957 .addReg(RD2)
6958 .addMBB(TrueBB);
6959
6960 // The pseudo instruction is gone now.
6961 MI.eraseFromParent();
6962 return SinkBB;
6963}
6964
6965static MachineBasicBlock *
6967 const LoongArchSubtarget &Subtarget) {
6968 unsigned InsOp;
6969 unsigned BroadcastOp;
6970 unsigned HalfSize;
6971 switch (MI.getOpcode()) {
6972 default:
6973 llvm_unreachable("Unexpected opcode");
6974 case LoongArch::PseudoXVINSGR2VR_B:
6975 HalfSize = 16;
6976 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6977 InsOp = LoongArch::XVEXTRINS_B;
6978 break;
6979 case LoongArch::PseudoXVINSGR2VR_H:
6980 HalfSize = 8;
6981 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6982 InsOp = LoongArch::XVEXTRINS_H;
6983 break;
6984 }
6985 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6986 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6987 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6988 DebugLoc DL = MI.getDebugLoc();
6990 // XDst = vector_insert XSrc, Elt, Idx
6991 Register XDst = MI.getOperand(0).getReg();
6992 Register XSrc = MI.getOperand(1).getReg();
6993 Register Elt = MI.getOperand(2).getReg();
6994 unsigned Idx = MI.getOperand(3).getImm();
6995
6996 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6997 Idx < HalfSize) {
6998 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6999 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7000
7001 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7002 .addReg(XSrc, 0, LoongArch::sub_128);
7003 BuildMI(*BB, MI, DL,
7004 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7005 : LoongArch::VINSGR2VR_B),
7006 ScratchSubReg2)
7007 .addReg(ScratchSubReg1)
7008 .addReg(Elt)
7009 .addImm(Idx);
7010
7011 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7012 .addImm(0)
7013 .addReg(ScratchSubReg2)
7014 .addImm(LoongArch::sub_128);
7015 } else {
7016 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7017 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7018
7019 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7020
7021 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7022 .addReg(ScratchReg1)
7023 .addReg(XSrc)
7024 .addImm(Idx >= HalfSize ? 48 : 18);
7025
7026 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7027 .addReg(XSrc)
7028 .addReg(ScratchReg2)
7029 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7030 }
7031
7032 MI.eraseFromParent();
7033 return BB;
7034}
7035
7038 const LoongArchSubtarget &Subtarget) {
7039 assert(Subtarget.hasExtLSX());
7040 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7041 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7042 DebugLoc DL = MI.getDebugLoc();
7044 Register Dst = MI.getOperand(0).getReg();
7045 Register Src = MI.getOperand(1).getReg();
7046 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7047 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7048 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7049
7050 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7051 BuildMI(*BB, MI, DL,
7052 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7053 : LoongArch::VINSGR2VR_W),
7054 ScratchReg2)
7055 .addReg(ScratchReg1)
7056 .addReg(Src)
7057 .addImm(0);
7058 BuildMI(
7059 *BB, MI, DL,
7060 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7061 ScratchReg3)
7062 .addReg(ScratchReg2);
7063 BuildMI(*BB, MI, DL,
7064 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7065 : LoongArch::VPICKVE2GR_W),
7066 Dst)
7067 .addReg(ScratchReg3)
7068 .addImm(0);
7069
7070 MI.eraseFromParent();
7071 return BB;
7072}
7073
7074static MachineBasicBlock *
7076 const LoongArchSubtarget &Subtarget) {
7077 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7078 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7079 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7081 Register Dst = MI.getOperand(0).getReg();
7082 Register Src = MI.getOperand(1).getReg();
7083 DebugLoc DL = MI.getDebugLoc();
7084 unsigned EleBits = 8;
7085 unsigned NotOpc = 0;
7086 unsigned MskOpc;
7087
7088 switch (MI.getOpcode()) {
7089 default:
7090 llvm_unreachable("Unexpected opcode");
7091 case LoongArch::PseudoVMSKLTZ_B:
7092 MskOpc = LoongArch::VMSKLTZ_B;
7093 break;
7094 case LoongArch::PseudoVMSKLTZ_H:
7095 MskOpc = LoongArch::VMSKLTZ_H;
7096 EleBits = 16;
7097 break;
7098 case LoongArch::PseudoVMSKLTZ_W:
7099 MskOpc = LoongArch::VMSKLTZ_W;
7100 EleBits = 32;
7101 break;
7102 case LoongArch::PseudoVMSKLTZ_D:
7103 MskOpc = LoongArch::VMSKLTZ_D;
7104 EleBits = 64;
7105 break;
7106 case LoongArch::PseudoVMSKGEZ_B:
7107 MskOpc = LoongArch::VMSKGEZ_B;
7108 break;
7109 case LoongArch::PseudoVMSKEQZ_B:
7110 MskOpc = LoongArch::VMSKNZ_B;
7111 NotOpc = LoongArch::VNOR_V;
7112 break;
7113 case LoongArch::PseudoVMSKNEZ_B:
7114 MskOpc = LoongArch::VMSKNZ_B;
7115 break;
7116 case LoongArch::PseudoXVMSKLTZ_B:
7117 MskOpc = LoongArch::XVMSKLTZ_B;
7118 RC = &LoongArch::LASX256RegClass;
7119 break;
7120 case LoongArch::PseudoXVMSKLTZ_H:
7121 MskOpc = LoongArch::XVMSKLTZ_H;
7122 RC = &LoongArch::LASX256RegClass;
7123 EleBits = 16;
7124 break;
7125 case LoongArch::PseudoXVMSKLTZ_W:
7126 MskOpc = LoongArch::XVMSKLTZ_W;
7127 RC = &LoongArch::LASX256RegClass;
7128 EleBits = 32;
7129 break;
7130 case LoongArch::PseudoXVMSKLTZ_D:
7131 MskOpc = LoongArch::XVMSKLTZ_D;
7132 RC = &LoongArch::LASX256RegClass;
7133 EleBits = 64;
7134 break;
7135 case LoongArch::PseudoXVMSKGEZ_B:
7136 MskOpc = LoongArch::XVMSKGEZ_B;
7137 RC = &LoongArch::LASX256RegClass;
7138 break;
7139 case LoongArch::PseudoXVMSKEQZ_B:
7140 MskOpc = LoongArch::XVMSKNZ_B;
7141 NotOpc = LoongArch::XVNOR_V;
7142 RC = &LoongArch::LASX256RegClass;
7143 break;
7144 case LoongArch::PseudoXVMSKNEZ_B:
7145 MskOpc = LoongArch::XVMSKNZ_B;
7146 RC = &LoongArch::LASX256RegClass;
7147 break;
7148 }
7149
7150 Register Msk = MRI.createVirtualRegister(RC);
7151 if (NotOpc) {
7152 Register Tmp = MRI.createVirtualRegister(RC);
7153 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7154 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7155 .addReg(Tmp, RegState::Kill)
7156 .addReg(Tmp, RegState::Kill);
7157 } else {
7158 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7159 }
7160
7161 if (TRI->getRegSizeInBits(*RC) > 128) {
7162 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7163 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7164 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7165 .addReg(Msk)
7166 .addImm(0);
7167 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7168 .addReg(Msk, RegState::Kill)
7169 .addImm(4);
7170 BuildMI(*BB, MI, DL,
7171 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7172 : LoongArch::BSTRINS_W),
7173 Dst)
7176 .addImm(256 / EleBits - 1)
7177 .addImm(128 / EleBits);
7178 } else {
7179 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7180 .addReg(Msk, RegState::Kill)
7181 .addImm(0);
7182 }
7183
7184 MI.eraseFromParent();
7185 return BB;
7186}
7187
7188static MachineBasicBlock *
7190 const LoongArchSubtarget &Subtarget) {
7191 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7192 "Unexpected instruction");
7193
7194 MachineFunction &MF = *BB->getParent();
7195 DebugLoc DL = MI.getDebugLoc();
7197 Register LoReg = MI.getOperand(0).getReg();
7198 Register HiReg = MI.getOperand(1).getReg();
7199 Register SrcReg = MI.getOperand(2).getReg();
7200
7201 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7202 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7203 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7204 MI.eraseFromParent(); // The pseudo instruction is gone now.
7205 return BB;
7206}
7207
7208static MachineBasicBlock *
7210 const LoongArchSubtarget &Subtarget) {
7211 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7212 "Unexpected instruction");
7213
7214 MachineFunction &MF = *BB->getParent();
7215 DebugLoc DL = MI.getDebugLoc();
7218 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7219 Register DstReg = MI.getOperand(0).getReg();
7220 Register LoReg = MI.getOperand(1).getReg();
7221 Register HiReg = MI.getOperand(2).getReg();
7222
7223 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7224 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7225 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7226 .addReg(TmpReg, RegState::Kill)
7227 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7228 MI.eraseFromParent(); // The pseudo instruction is gone now.
7229 return BB;
7230}
7231
7233 switch (MI.getOpcode()) {
7234 default:
7235 return false;
7236 case LoongArch::Select_GPR_Using_CC_GPR:
7237 return true;
7238 }
7239}
7240
7241static MachineBasicBlock *
7243 const LoongArchSubtarget &Subtarget) {
7244 // To "insert" Select_* instructions, we actually have to insert the triangle
7245 // control-flow pattern. The incoming instructions know the destination vreg
7246 // to set, the condition code register to branch on, the true/false values to
7247 // select between, and the condcode to use to select the appropriate branch.
7248 //
7249 // We produce the following control flow:
7250 // HeadMBB
7251 // | \
7252 // | IfFalseMBB
7253 // | /
7254 // TailMBB
7255 //
7256 // When we find a sequence of selects we attempt to optimize their emission
7257 // by sharing the control flow. Currently we only handle cases where we have
7258 // multiple selects with the exact same condition (same LHS, RHS and CC).
7259 // The selects may be interleaved with other instructions if the other
7260 // instructions meet some requirements we deem safe:
7261 // - They are not pseudo instructions.
7262 // - They are debug instructions. Otherwise,
7263 // - They do not have side-effects, do not access memory and their inputs do
7264 // not depend on the results of the select pseudo-instructions.
7265 // The TrueV/FalseV operands of the selects cannot depend on the result of
7266 // previous selects in the sequence.
7267 // These conditions could be further relaxed. See the X86 target for a
7268 // related approach and more information.
7269
7270 Register LHS = MI.getOperand(1).getReg();
7271 Register RHS;
7272 if (MI.getOperand(2).isReg())
7273 RHS = MI.getOperand(2).getReg();
7274 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7275
7276 SmallVector<MachineInstr *, 4> SelectDebugValues;
7277 SmallSet<Register, 4> SelectDests;
7278 SelectDests.insert(MI.getOperand(0).getReg());
7279
7280 MachineInstr *LastSelectPseudo = &MI;
7281 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7282 SequenceMBBI != E; ++SequenceMBBI) {
7283 if (SequenceMBBI->isDebugInstr())
7284 continue;
7285 if (isSelectPseudo(*SequenceMBBI)) {
7286 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7287 !SequenceMBBI->getOperand(2).isReg() ||
7288 SequenceMBBI->getOperand(2).getReg() != RHS ||
7289 SequenceMBBI->getOperand(3).getImm() != CC ||
7290 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7291 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7292 break;
7293 LastSelectPseudo = &*SequenceMBBI;
7294 SequenceMBBI->collectDebugValues(SelectDebugValues);
7295 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7296 continue;
7297 }
7298 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7299 SequenceMBBI->mayLoadOrStore() ||
7300 SequenceMBBI->usesCustomInsertionHook())
7301 break;
7302 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7303 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7304 }))
7305 break;
7306 }
7307
7308 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7309 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7310 DebugLoc DL = MI.getDebugLoc();
7312
7313 MachineBasicBlock *HeadMBB = BB;
7314 MachineFunction *F = BB->getParent();
7315 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7316 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7317
7318 F->insert(I, IfFalseMBB);
7319 F->insert(I, TailMBB);
7320
7321 // Set the call frame size on entry to the new basic blocks.
7322 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7323 IfFalseMBB->setCallFrameSize(CallFrameSize);
7324 TailMBB->setCallFrameSize(CallFrameSize);
7325
7326 // Transfer debug instructions associated with the selects to TailMBB.
7327 for (MachineInstr *DebugInstr : SelectDebugValues) {
7328 TailMBB->push_back(DebugInstr->removeFromParent());
7329 }
7330
7331 // Move all instructions after the sequence to TailMBB.
7332 TailMBB->splice(TailMBB->end(), HeadMBB,
7333 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7334 // Update machine-CFG edges by transferring all successors of the current
7335 // block to the new block which will contain the Phi nodes for the selects.
7336 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7337 // Set the successors for HeadMBB.
7338 HeadMBB->addSuccessor(IfFalseMBB);
7339 HeadMBB->addSuccessor(TailMBB);
7340
7341 // Insert appropriate branch.
7342 if (MI.getOperand(2).isImm())
7343 BuildMI(HeadMBB, DL, TII.get(CC))
7344 .addReg(LHS)
7345 .addImm(MI.getOperand(2).getImm())
7346 .addMBB(TailMBB);
7347 else
7348 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7349
7350 // IfFalseMBB just falls through to TailMBB.
7351 IfFalseMBB->addSuccessor(TailMBB);
7352
7353 // Create PHIs for all of the select pseudo-instructions.
7354 auto SelectMBBI = MI.getIterator();
7355 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7356 auto InsertionPoint = TailMBB->begin();
7357 while (SelectMBBI != SelectEnd) {
7358 auto Next = std::next(SelectMBBI);
7359 if (isSelectPseudo(*SelectMBBI)) {
7360 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7361 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7362 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7363 .addReg(SelectMBBI->getOperand(4).getReg())
7364 .addMBB(HeadMBB)
7365 .addReg(SelectMBBI->getOperand(5).getReg())
7366 .addMBB(IfFalseMBB);
7367 SelectMBBI->eraseFromParent();
7368 }
7369 SelectMBBI = Next;
7370 }
7371
7372 F->getProperties().resetNoPHIs();
7373 return TailMBB;
7374}
7375
7376MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7377 MachineInstr &MI, MachineBasicBlock *BB) const {
7378 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7379 DebugLoc DL = MI.getDebugLoc();
7380
7381 switch (MI.getOpcode()) {
7382 default:
7383 llvm_unreachable("Unexpected instr type to insert");
7384 case LoongArch::DIV_W:
7385 case LoongArch::DIV_WU:
7386 case LoongArch::MOD_W:
7387 case LoongArch::MOD_WU:
7388 case LoongArch::DIV_D:
7389 case LoongArch::DIV_DU:
7390 case LoongArch::MOD_D:
7391 case LoongArch::MOD_DU:
7392 return insertDivByZeroTrap(MI, BB);
7393 break;
7394 case LoongArch::WRFCSR: {
7395 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7396 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7397 .addReg(MI.getOperand(1).getReg());
7398 MI.eraseFromParent();
7399 return BB;
7400 }
7401 case LoongArch::RDFCSR: {
7402 MachineInstr *ReadFCSR =
7403 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7404 MI.getOperand(0).getReg())
7405 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7406 ReadFCSR->getOperand(1).setIsUndef();
7407 MI.eraseFromParent();
7408 return BB;
7409 }
7410 case LoongArch::Select_GPR_Using_CC_GPR:
7411 return emitSelectPseudo(MI, BB, Subtarget);
7412 case LoongArch::BuildPairF64Pseudo:
7413 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7414 case LoongArch::SplitPairF64Pseudo:
7415 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7416 case LoongArch::PseudoVBZ:
7417 case LoongArch::PseudoVBZ_B:
7418 case LoongArch::PseudoVBZ_H:
7419 case LoongArch::PseudoVBZ_W:
7420 case LoongArch::PseudoVBZ_D:
7421 case LoongArch::PseudoVBNZ:
7422 case LoongArch::PseudoVBNZ_B:
7423 case LoongArch::PseudoVBNZ_H:
7424 case LoongArch::PseudoVBNZ_W:
7425 case LoongArch::PseudoVBNZ_D:
7426 case LoongArch::PseudoXVBZ:
7427 case LoongArch::PseudoXVBZ_B:
7428 case LoongArch::PseudoXVBZ_H:
7429 case LoongArch::PseudoXVBZ_W:
7430 case LoongArch::PseudoXVBZ_D:
7431 case LoongArch::PseudoXVBNZ:
7432 case LoongArch::PseudoXVBNZ_B:
7433 case LoongArch::PseudoXVBNZ_H:
7434 case LoongArch::PseudoXVBNZ_W:
7435 case LoongArch::PseudoXVBNZ_D:
7436 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7437 case LoongArch::PseudoXVINSGR2VR_B:
7438 case LoongArch::PseudoXVINSGR2VR_H:
7439 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7440 case LoongArch::PseudoCTPOP:
7441 return emitPseudoCTPOP(MI, BB, Subtarget);
7442 case LoongArch::PseudoVMSKLTZ_B:
7443 case LoongArch::PseudoVMSKLTZ_H:
7444 case LoongArch::PseudoVMSKLTZ_W:
7445 case LoongArch::PseudoVMSKLTZ_D:
7446 case LoongArch::PseudoVMSKGEZ_B:
7447 case LoongArch::PseudoVMSKEQZ_B:
7448 case LoongArch::PseudoVMSKNEZ_B:
7449 case LoongArch::PseudoXVMSKLTZ_B:
7450 case LoongArch::PseudoXVMSKLTZ_H:
7451 case LoongArch::PseudoXVMSKLTZ_W:
7452 case LoongArch::PseudoXVMSKLTZ_D:
7453 case LoongArch::PseudoXVMSKGEZ_B:
7454 case LoongArch::PseudoXVMSKEQZ_B:
7455 case LoongArch::PseudoXVMSKNEZ_B:
7456 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7457 case TargetOpcode::STATEPOINT:
7458 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7459 // while bl call instruction (where statepoint will be lowered at the
7460 // end) has implicit def. This def is early-clobber as it will be set at
7461 // the moment of the call and earlier than any use is read.
7462 // Add this implicit dead def here as a workaround.
7463 MI.addOperand(*MI.getMF(),
7465 LoongArch::R1, /*isDef*/ true,
7466 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7467 /*isUndef*/ false, /*isEarlyClobber*/ true));
7468 if (!Subtarget.is64Bit())
7469 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7470 return emitPatchPoint(MI, BB);
7471 }
7472}
7473
7475 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7476 unsigned *Fast) const {
7477 if (!Subtarget.hasUAL())
7478 return false;
7479
7480 // TODO: set reasonable speed number.
7481 if (Fast)
7482 *Fast = 1;
7483 return true;
7484}
7485
7486const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7487 switch ((LoongArchISD::NodeType)Opcode) {
7489 break;
7490
7491#define NODE_NAME_CASE(node) \
7492 case LoongArchISD::node: \
7493 return "LoongArchISD::" #node;
7494
7495 // TODO: Add more target-dependent nodes later.
7496 NODE_NAME_CASE(CALL)
7497 NODE_NAME_CASE(CALL_MEDIUM)
7498 NODE_NAME_CASE(CALL_LARGE)
7499 NODE_NAME_CASE(RET)
7500 NODE_NAME_CASE(TAIL)
7501 NODE_NAME_CASE(TAIL_MEDIUM)
7502 NODE_NAME_CASE(TAIL_LARGE)
7503 NODE_NAME_CASE(SELECT_CC)
7504 NODE_NAME_CASE(BR_CC)
7505 NODE_NAME_CASE(BRCOND)
7506 NODE_NAME_CASE(SLL_W)
7507 NODE_NAME_CASE(SRA_W)
7508 NODE_NAME_CASE(SRL_W)
7509 NODE_NAME_CASE(BSTRINS)
7510 NODE_NAME_CASE(BSTRPICK)
7511 NODE_NAME_CASE(MOVGR2FR_W)
7512 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7513 NODE_NAME_CASE(MOVGR2FR_D)
7514 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7515 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7516 NODE_NAME_CASE(FTINT)
7517 NODE_NAME_CASE(BUILD_PAIR_F64)
7518 NODE_NAME_CASE(SPLIT_PAIR_F64)
7519 NODE_NAME_CASE(REVB_2H)
7520 NODE_NAME_CASE(REVB_2W)
7521 NODE_NAME_CASE(BITREV_4B)
7522 NODE_NAME_CASE(BITREV_8B)
7523 NODE_NAME_CASE(BITREV_W)
7524 NODE_NAME_CASE(ROTR_W)
7525 NODE_NAME_CASE(ROTL_W)
7526 NODE_NAME_CASE(DIV_W)
7527 NODE_NAME_CASE(DIV_WU)
7528 NODE_NAME_CASE(MOD_W)
7529 NODE_NAME_CASE(MOD_WU)
7530 NODE_NAME_CASE(CLZ_W)
7531 NODE_NAME_CASE(CTZ_W)
7532 NODE_NAME_CASE(DBAR)
7533 NODE_NAME_CASE(IBAR)
7534 NODE_NAME_CASE(BREAK)
7535 NODE_NAME_CASE(SYSCALL)
7536 NODE_NAME_CASE(CRC_W_B_W)
7537 NODE_NAME_CASE(CRC_W_H_W)
7538 NODE_NAME_CASE(CRC_W_W_W)
7539 NODE_NAME_CASE(CRC_W_D_W)
7540 NODE_NAME_CASE(CRCC_W_B_W)
7541 NODE_NAME_CASE(CRCC_W_H_W)
7542 NODE_NAME_CASE(CRCC_W_W_W)
7543 NODE_NAME_CASE(CRCC_W_D_W)
7544 NODE_NAME_CASE(CSRRD)
7545 NODE_NAME_CASE(CSRWR)
7546 NODE_NAME_CASE(CSRXCHG)
7547 NODE_NAME_CASE(IOCSRRD_B)
7548 NODE_NAME_CASE(IOCSRRD_H)
7549 NODE_NAME_CASE(IOCSRRD_W)
7550 NODE_NAME_CASE(IOCSRRD_D)
7551 NODE_NAME_CASE(IOCSRWR_B)
7552 NODE_NAME_CASE(IOCSRWR_H)
7553 NODE_NAME_CASE(IOCSRWR_W)
7554 NODE_NAME_CASE(IOCSRWR_D)
7555 NODE_NAME_CASE(CPUCFG)
7556 NODE_NAME_CASE(MOVGR2FCSR)
7557 NODE_NAME_CASE(MOVFCSR2GR)
7558 NODE_NAME_CASE(CACOP_D)
7559 NODE_NAME_CASE(CACOP_W)
7560 NODE_NAME_CASE(VSHUF)
7561 NODE_NAME_CASE(VPICKEV)
7562 NODE_NAME_CASE(VPICKOD)
7563 NODE_NAME_CASE(VPACKEV)
7564 NODE_NAME_CASE(VPACKOD)
7565 NODE_NAME_CASE(VILVL)
7566 NODE_NAME_CASE(VILVH)
7567 NODE_NAME_CASE(VSHUF4I)
7568 NODE_NAME_CASE(VREPLVEI)
7569 NODE_NAME_CASE(VREPLGR2VR)
7570 NODE_NAME_CASE(XVPERMI)
7571 NODE_NAME_CASE(XVPERM)
7572 NODE_NAME_CASE(XVREPLVE0)
7573 NODE_NAME_CASE(XVREPLVE0Q)
7574 NODE_NAME_CASE(XVINSVE0)
7575 NODE_NAME_CASE(VPICK_SEXT_ELT)
7576 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7577 NODE_NAME_CASE(VREPLVE)
7578 NODE_NAME_CASE(VALL_ZERO)
7579 NODE_NAME_CASE(VANY_ZERO)
7580 NODE_NAME_CASE(VALL_NONZERO)
7581 NODE_NAME_CASE(VANY_NONZERO)
7582 NODE_NAME_CASE(FRECIPE)
7583 NODE_NAME_CASE(FRSQRTE)
7584 NODE_NAME_CASE(VSLLI)
7585 NODE_NAME_CASE(VSRLI)
7586 NODE_NAME_CASE(VBSLL)
7587 NODE_NAME_CASE(VBSRL)
7588 NODE_NAME_CASE(VLDREPL)
7589 NODE_NAME_CASE(VMSKLTZ)
7590 NODE_NAME_CASE(VMSKGEZ)
7591 NODE_NAME_CASE(VMSKEQZ)
7592 NODE_NAME_CASE(VMSKNEZ)
7593 NODE_NAME_CASE(XVMSKLTZ)
7594 NODE_NAME_CASE(XVMSKGEZ)
7595 NODE_NAME_CASE(XVMSKEQZ)
7596 NODE_NAME_CASE(XVMSKNEZ)
7597 NODE_NAME_CASE(VHADDW)
7598 }
7599#undef NODE_NAME_CASE
7600 return nullptr;
7601}
7602
7603//===----------------------------------------------------------------------===//
7604// Calling Convention Implementation
7605//===----------------------------------------------------------------------===//
7606
7607// Eight general-purpose registers a0-a7 used for passing integer arguments,
7608// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7609// fixed-point arguments, and floating-point arguments when no FPR is available
7610// or with soft float ABI.
7611const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7612 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7613 LoongArch::R10, LoongArch::R11};
7614// Eight floating-point registers fa0-fa7 used for passing floating-point
7615// arguments, and fa0-fa1 are also used to return values.
7616const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7617 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7618 LoongArch::F6, LoongArch::F7};
7619// FPR32 and FPR64 alias each other.
7621 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7622 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7623
7624const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7625 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7626 LoongArch::VR6, LoongArch::VR7};
7627
7628const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7629 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7630 LoongArch::XR6, LoongArch::XR7};
7631
7632// Pass a 2*GRLen argument that has been split into two GRLen values through
7633// registers or the stack as necessary.
7634static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7635 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7636 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7637 ISD::ArgFlagsTy ArgFlags2) {
7638 unsigned GRLenInBytes = GRLen / 8;
7639 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7640 // At least one half can be passed via register.
7641 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7642 VA1.getLocVT(), CCValAssign::Full));
7643 } else {
7644 // Both halves must be passed on the stack, with proper alignment.
7645 Align StackAlign =
7646 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7647 State.addLoc(
7649 State.AllocateStack(GRLenInBytes, StackAlign),
7650 VA1.getLocVT(), CCValAssign::Full));
7651 State.addLoc(CCValAssign::getMem(
7652 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7653 LocVT2, CCValAssign::Full));
7654 return false;
7655 }
7656 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7657 // The second half can also be passed via register.
7658 State.addLoc(
7659 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7660 } else {
7661 // The second half is passed via the stack, without additional alignment.
7662 State.addLoc(CCValAssign::getMem(
7663 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7664 LocVT2, CCValAssign::Full));
7665 }
7666 return false;
7667}
7668
7669// Implements the LoongArch calling convention. Returns true upon failure.
7671 unsigned ValNo, MVT ValVT,
7672 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7673 CCState &State, bool IsRet, Type *OrigTy) {
7674 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7675 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7676 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7677 MVT LocVT = ValVT;
7678
7679 // Any return value split into more than two values can't be returned
7680 // directly.
7681 if (IsRet && ValNo > 1)
7682 return true;
7683
7684 // If passing a variadic argument, or if no FPR is available.
7685 bool UseGPRForFloat = true;
7686
7687 switch (ABI) {
7688 default:
7689 llvm_unreachable("Unexpected ABI");
7690 break;
7695 UseGPRForFloat = ArgFlags.isVarArg();
7696 break;
7699 break;
7700 }
7701
7702 // If this is a variadic argument, the LoongArch calling convention requires
7703 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7704 // byte alignment. An aligned register should be used regardless of whether
7705 // the original argument was split during legalisation or not. The argument
7706 // will not be passed by registers if the original type is larger than
7707 // 2*GRLen, so the register alignment rule does not apply.
7708 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7709 if (ArgFlags.isVarArg() &&
7710 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7711 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7712 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7713 // Skip 'odd' register if necessary.
7714 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7715 State.AllocateReg(ArgGPRs);
7716 }
7717
7718 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7719 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7720 State.getPendingArgFlags();
7721
7722 assert(PendingLocs.size() == PendingArgFlags.size() &&
7723 "PendingLocs and PendingArgFlags out of sync");
7724
7725 // FPR32 and FPR64 alias each other.
7726 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7727 UseGPRForFloat = true;
7728
7729 if (UseGPRForFloat && ValVT == MVT::f32) {
7730 LocVT = GRLenVT;
7731 LocInfo = CCValAssign::BCvt;
7732 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7733 LocVT = MVT::i64;
7734 LocInfo = CCValAssign::BCvt;
7735 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7736 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7737 // registers are exhausted.
7738 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7739 // Depending on available argument GPRS, f64 may be passed in a pair of
7740 // GPRs, split between a GPR and the stack, or passed completely on the
7741 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7742 // cases.
7743 MCRegister Reg = State.AllocateReg(ArgGPRs);
7744 if (!Reg) {
7745 int64_t StackOffset = State.AllocateStack(8, Align(8));
7746 State.addLoc(
7747 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7748 return false;
7749 }
7750 LocVT = MVT::i32;
7751 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7752 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7753 if (HiReg) {
7754 State.addLoc(
7755 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7756 } else {
7757 int64_t StackOffset = State.AllocateStack(4, Align(4));
7758 State.addLoc(
7759 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7760 }
7761 return false;
7762 }
7763
7764 // Split arguments might be passed indirectly, so keep track of the pending
7765 // values.
7766 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7767 LocVT = GRLenVT;
7768 LocInfo = CCValAssign::Indirect;
7769 PendingLocs.push_back(
7770 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7771 PendingArgFlags.push_back(ArgFlags);
7772 if (!ArgFlags.isSplitEnd()) {
7773 return false;
7774 }
7775 }
7776
7777 // If the split argument only had two elements, it should be passed directly
7778 // in registers or on the stack.
7779 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7780 PendingLocs.size() <= 2) {
7781 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7782 // Apply the normal calling convention rules to the first half of the
7783 // split argument.
7784 CCValAssign VA = PendingLocs[0];
7785 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7786 PendingLocs.clear();
7787 PendingArgFlags.clear();
7788 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7789 ArgFlags);
7790 }
7791
7792 // Allocate to a register if possible, or else a stack slot.
7793 Register Reg;
7794 unsigned StoreSizeBytes = GRLen / 8;
7795 Align StackAlign = Align(GRLen / 8);
7796
7797 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7798 Reg = State.AllocateReg(ArgFPR32s);
7799 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7800 Reg = State.AllocateReg(ArgFPR64s);
7801 } else if (ValVT.is128BitVector()) {
7802 Reg = State.AllocateReg(ArgVRs);
7803 UseGPRForFloat = false;
7804 StoreSizeBytes = 16;
7805 StackAlign = Align(16);
7806 } else if (ValVT.is256BitVector()) {
7807 Reg = State.AllocateReg(ArgXRs);
7808 UseGPRForFloat = false;
7809 StoreSizeBytes = 32;
7810 StackAlign = Align(32);
7811 } else {
7812 Reg = State.AllocateReg(ArgGPRs);
7813 }
7814
7815 unsigned StackOffset =
7816 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7817
7818 // If we reach this point and PendingLocs is non-empty, we must be at the
7819 // end of a split argument that must be passed indirectly.
7820 if (!PendingLocs.empty()) {
7821 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7822 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7823 for (auto &It : PendingLocs) {
7824 if (Reg)
7825 It.convertToReg(Reg);
7826 else
7827 It.convertToMem(StackOffset);
7828 State.addLoc(It);
7829 }
7830 PendingLocs.clear();
7831 PendingArgFlags.clear();
7832 return false;
7833 }
7834 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7835 "Expected an GRLenVT at this stage");
7836
7837 if (Reg) {
7838 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7839 return false;
7840 }
7841
7842 // When a floating-point value is passed on the stack, no bit-cast is needed.
7843 if (ValVT.isFloatingPoint()) {
7844 LocVT = ValVT;
7845 LocInfo = CCValAssign::Full;
7846 }
7847
7848 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7849 return false;
7850}
7851
7852void LoongArchTargetLowering::analyzeInputArgs(
7853 MachineFunction &MF, CCState &CCInfo,
7854 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7855 LoongArchCCAssignFn Fn) const {
7856 FunctionType *FType = MF.getFunction().getFunctionType();
7857 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7858 MVT ArgVT = Ins[i].VT;
7859 Type *ArgTy = nullptr;
7860 if (IsRet)
7861 ArgTy = FType->getReturnType();
7862 else if (Ins[i].isOrigArg())
7863 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7865 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7866 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7867 CCInfo, IsRet, ArgTy)) {
7868 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7869 << '\n');
7870 llvm_unreachable("");
7871 }
7872 }
7873}
7874
7875void LoongArchTargetLowering::analyzeOutputArgs(
7876 MachineFunction &MF, CCState &CCInfo,
7877 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7878 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7879 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7880 MVT ArgVT = Outs[i].VT;
7881 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7883 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7884 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7885 CCInfo, IsRet, OrigTy)) {
7886 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7887 << "\n");
7888 llvm_unreachable("");
7889 }
7890 }
7891}
7892
7893// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7894// values.
7896 const CCValAssign &VA, const SDLoc &DL) {
7897 switch (VA.getLocInfo()) {
7898 default:
7899 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7900 case CCValAssign::Full:
7902 break;
7903 case CCValAssign::BCvt:
7904 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7905 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7906 else
7907 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7908 break;
7909 }
7910 return Val;
7911}
7912
7914 const CCValAssign &VA, const SDLoc &DL,
7915 const ISD::InputArg &In,
7916 const LoongArchTargetLowering &TLI) {
7919 EVT LocVT = VA.getLocVT();
7920 SDValue Val;
7921 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7922 Register VReg = RegInfo.createVirtualRegister(RC);
7923 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7924 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7925
7926 // If input is sign extended from 32 bits, note it for the OptW pass.
7927 if (In.isOrigArg()) {
7928 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7929 if (OrigArg->getType()->isIntegerTy()) {
7930 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7931 // An input zero extended from i31 can also be considered sign extended.
7932 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7933 (BitWidth < 32 && In.Flags.isZExt())) {
7936 LAFI->addSExt32Register(VReg);
7937 }
7938 }
7939 }
7940
7941 return convertLocVTToValVT(DAG, Val, VA, DL);
7942}
7943
7944// The caller is responsible for loading the full value if the argument is
7945// passed with CCValAssign::Indirect.
7947 const CCValAssign &VA, const SDLoc &DL) {
7949 MachineFrameInfo &MFI = MF.getFrameInfo();
7950 EVT ValVT = VA.getValVT();
7951 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7952 /*IsImmutable=*/true);
7953 SDValue FIN = DAG.getFrameIndex(
7955
7956 ISD::LoadExtType ExtType;
7957 switch (VA.getLocInfo()) {
7958 default:
7959 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7960 case CCValAssign::Full:
7962 case CCValAssign::BCvt:
7963 ExtType = ISD::NON_EXTLOAD;
7964 break;
7965 }
7966 return DAG.getExtLoad(
7967 ExtType, DL, VA.getLocVT(), Chain, FIN,
7969}
7970
7972 const CCValAssign &VA,
7973 const CCValAssign &HiVA,
7974 const SDLoc &DL) {
7975 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7976 "Unexpected VA");
7978 MachineFrameInfo &MFI = MF.getFrameInfo();
7980
7981 assert(VA.isRegLoc() && "Expected register VA assignment");
7982
7983 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7984 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7985 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7986 SDValue Hi;
7987 if (HiVA.isMemLoc()) {
7988 // Second half of f64 is passed on the stack.
7989 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7990 /*IsImmutable=*/true);
7991 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7992 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7994 } else {
7995 // Second half of f64 is passed in another GPR.
7996 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7997 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7998 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7999 }
8000 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8001}
8002
8004 const CCValAssign &VA, const SDLoc &DL) {
8005 EVT LocVT = VA.getLocVT();
8006
8007 switch (VA.getLocInfo()) {
8008 default:
8009 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8010 case CCValAssign::Full:
8011 break;
8012 case CCValAssign::BCvt:
8013 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8014 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8015 else
8016 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8017 break;
8018 }
8019 return Val;
8020}
8021
8022static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8023 CCValAssign::LocInfo LocInfo,
8024 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8025 CCState &State) {
8026 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8027 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8028 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8029 static const MCPhysReg GPRList[] = {
8030 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8031 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8032 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8033 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8034 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8035 return false;
8036 }
8037 }
8038
8039 if (LocVT == MVT::f32) {
8040 // Pass in STG registers: F1, F2, F3, F4
8041 // fs0,fs1,fs2,fs3
8042 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8043 LoongArch::F26, LoongArch::F27};
8044 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8045 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8046 return false;
8047 }
8048 }
8049
8050 if (LocVT == MVT::f64) {
8051 // Pass in STG registers: D1, D2, D3, D4
8052 // fs4,fs5,fs6,fs7
8053 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8054 LoongArch::F30_64, LoongArch::F31_64};
8055 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8056 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8057 return false;
8058 }
8059 }
8060
8061 report_fatal_error("No registers left in GHC calling convention");
8062 return true;
8063}
8064
8065// Transform physical registers into virtual registers.
8067 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8068 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8069 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8070
8072
8073 switch (CallConv) {
8074 default:
8075 llvm_unreachable("Unsupported calling convention");
8076 case CallingConv::C:
8077 case CallingConv::Fast:
8079 break;
8080 case CallingConv::GHC:
8081 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8082 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8084 "GHC calling convention requires the F and D extensions");
8085 }
8086
8087 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8088 MVT GRLenVT = Subtarget.getGRLenVT();
8089 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8090 // Used with varargs to acumulate store chains.
8091 std::vector<SDValue> OutChains;
8092
8093 // Assign locations to all of the incoming arguments.
8095 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8096
8097 if (CallConv == CallingConv::GHC)
8099 else
8100 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8101
8102 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8103 CCValAssign &VA = ArgLocs[i];
8104 SDValue ArgValue;
8105 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8106 // case.
8107 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8108 assert(VA.needsCustom());
8109 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8110 } else if (VA.isRegLoc())
8111 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8112 else
8113 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8114 if (VA.getLocInfo() == CCValAssign::Indirect) {
8115 // If the original argument was split and passed by reference, we need to
8116 // load all parts of it here (using the same address).
8117 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8119 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8120 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8121 assert(ArgPartOffset == 0);
8122 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8123 CCValAssign &PartVA = ArgLocs[i + 1];
8124 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8125 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8126 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8127 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8129 ++i;
8130 ++InsIdx;
8131 }
8132 continue;
8133 }
8134 InVals.push_back(ArgValue);
8135 }
8136
8137 if (IsVarArg) {
8139 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8140 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8141 MachineFrameInfo &MFI = MF.getFrameInfo();
8142 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8143 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8144
8145 // Offset of the first variable argument from stack pointer, and size of
8146 // the vararg save area. For now, the varargs save area is either zero or
8147 // large enough to hold a0-a7.
8148 int VaArgOffset, VarArgsSaveSize;
8149
8150 // If all registers are allocated, then all varargs must be passed on the
8151 // stack and we don't need to save any argregs.
8152 if (ArgRegs.size() == Idx) {
8153 VaArgOffset = CCInfo.getStackSize();
8154 VarArgsSaveSize = 0;
8155 } else {
8156 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8157 VaArgOffset = -VarArgsSaveSize;
8158 }
8159
8160 // Record the frame index of the first variable argument
8161 // which is a value necessary to VASTART.
8162 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8163 LoongArchFI->setVarArgsFrameIndex(FI);
8164
8165 // If saving an odd number of registers then create an extra stack slot to
8166 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8167 // offsets to even-numbered registered remain 2*GRLen-aligned.
8168 if (Idx % 2) {
8169 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8170 true);
8171 VarArgsSaveSize += GRLenInBytes;
8172 }
8173
8174 // Copy the integer registers that may have been used for passing varargs
8175 // to the vararg save area.
8176 for (unsigned I = Idx; I < ArgRegs.size();
8177 ++I, VaArgOffset += GRLenInBytes) {
8178 const Register Reg = RegInfo.createVirtualRegister(RC);
8179 RegInfo.addLiveIn(ArgRegs[I], Reg);
8180 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8181 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8182 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8183 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8185 cast<StoreSDNode>(Store.getNode())
8186 ->getMemOperand()
8187 ->setValue((Value *)nullptr);
8188 OutChains.push_back(Store);
8189 }
8190 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8191 }
8192
8193 // All stores are grouped in one node to allow the matching between
8194 // the size of Ins and InVals. This only happens for vararg functions.
8195 if (!OutChains.empty()) {
8196 OutChains.push_back(Chain);
8197 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8198 }
8199
8200 return Chain;
8201}
8202
8204 return CI->isTailCall();
8205}
8206
8207// Check if the return value is used as only a return value, as otherwise
8208// we can't perform a tail-call.
8210 SDValue &Chain) const {
8211 if (N->getNumValues() != 1)
8212 return false;
8213 if (!N->hasNUsesOfValue(1, 0))
8214 return false;
8215
8216 SDNode *Copy = *N->user_begin();
8217 if (Copy->getOpcode() != ISD::CopyToReg)
8218 return false;
8219
8220 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8221 // isn't safe to perform a tail call.
8222 if (Copy->getGluedNode())
8223 return false;
8224
8225 // The copy must be used by a LoongArchISD::RET, and nothing else.
8226 bool HasRet = false;
8227 for (SDNode *Node : Copy->users()) {
8228 if (Node->getOpcode() != LoongArchISD::RET)
8229 return false;
8230 HasRet = true;
8231 }
8232
8233 if (!HasRet)
8234 return false;
8235
8236 Chain = Copy->getOperand(0);
8237 return true;
8238}
8239
8240// Check whether the call is eligible for tail call optimization.
8241bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8242 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8243 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8244
8245 auto CalleeCC = CLI.CallConv;
8246 auto &Outs = CLI.Outs;
8247 auto &Caller = MF.getFunction();
8248 auto CallerCC = Caller.getCallingConv();
8249
8250 // Do not tail call opt if the stack is used to pass parameters.
8251 if (CCInfo.getStackSize() != 0)
8252 return false;
8253
8254 // Do not tail call opt if any parameters need to be passed indirectly.
8255 for (auto &VA : ArgLocs)
8256 if (VA.getLocInfo() == CCValAssign::Indirect)
8257 return false;
8258
8259 // Do not tail call opt if either caller or callee uses struct return
8260 // semantics.
8261 auto IsCallerStructRet = Caller.hasStructRetAttr();
8262 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8263 if (IsCallerStructRet || IsCalleeStructRet)
8264 return false;
8265
8266 // Do not tail call opt if either the callee or caller has a byval argument.
8267 for (auto &Arg : Outs)
8268 if (Arg.Flags.isByVal())
8269 return false;
8270
8271 // The callee has to preserve all registers the caller needs to preserve.
8272 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8273 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8274 if (CalleeCC != CallerCC) {
8275 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8276 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8277 return false;
8278 }
8279 return true;
8280}
8281
8283 return DAG.getDataLayout().getPrefTypeAlign(
8284 VT.getTypeForEVT(*DAG.getContext()));
8285}
8286
8287// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8288// and output parameter nodes.
8289SDValue
8291 SmallVectorImpl<SDValue> &InVals) const {
8292 SelectionDAG &DAG = CLI.DAG;
8293 SDLoc &DL = CLI.DL;
8295 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8297 SDValue Chain = CLI.Chain;
8298 SDValue Callee = CLI.Callee;
8299 CallingConv::ID CallConv = CLI.CallConv;
8300 bool IsVarArg = CLI.IsVarArg;
8301 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8302 MVT GRLenVT = Subtarget.getGRLenVT();
8303 bool &IsTailCall = CLI.IsTailCall;
8304
8306
8307 // Analyze the operands of the call, assigning locations to each operand.
8309 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8310
8311 if (CallConv == CallingConv::GHC)
8312 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8313 else
8314 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8315
8316 // Check if it's really possible to do a tail call.
8317 if (IsTailCall)
8318 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8319
8320 if (IsTailCall)
8321 ++NumTailCalls;
8322 else if (CLI.CB && CLI.CB->isMustTailCall())
8323 report_fatal_error("failed to perform tail call elimination on a call "
8324 "site marked musttail");
8325
8326 // Get a count of how many bytes are to be pushed on the stack.
8327 unsigned NumBytes = ArgCCInfo.getStackSize();
8328
8329 // Create local copies for byval args.
8330 SmallVector<SDValue> ByValArgs;
8331 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8332 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8333 if (!Flags.isByVal())
8334 continue;
8335
8336 SDValue Arg = OutVals[i];
8337 unsigned Size = Flags.getByValSize();
8338 Align Alignment = Flags.getNonZeroByValAlign();
8339
8340 int FI =
8341 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8342 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8343 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8344
8345 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8346 /*IsVolatile=*/false,
8347 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8349 ByValArgs.push_back(FIPtr);
8350 }
8351
8352 if (!IsTailCall)
8353 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8354
8355 // Copy argument values to their designated locations.
8357 SmallVector<SDValue> MemOpChains;
8358 SDValue StackPtr;
8359 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8360 ++i, ++OutIdx) {
8361 CCValAssign &VA = ArgLocs[i];
8362 SDValue ArgValue = OutVals[OutIdx];
8363 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8364
8365 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8366 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8367 assert(VA.isRegLoc() && "Expected register VA assignment");
8368 assert(VA.needsCustom());
8369 SDValue SplitF64 =
8371 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8372 SDValue Lo = SplitF64.getValue(0);
8373 SDValue Hi = SplitF64.getValue(1);
8374
8375 Register RegLo = VA.getLocReg();
8376 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8377
8378 // Get the CCValAssign for the Hi part.
8379 CCValAssign &HiVA = ArgLocs[++i];
8380
8381 if (HiVA.isMemLoc()) {
8382 // Second half of f64 is passed on the stack.
8383 if (!StackPtr.getNode())
8384 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8386 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8387 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8388 // Emit the store.
8389 MemOpChains.push_back(DAG.getStore(
8390 Chain, DL, Hi, Address,
8392 } else {
8393 // Second half of f64 is passed in another GPR.
8394 Register RegHigh = HiVA.getLocReg();
8395 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8396 }
8397 continue;
8398 }
8399
8400 // Promote the value if needed.
8401 // For now, only handle fully promoted and indirect arguments.
8402 if (VA.getLocInfo() == CCValAssign::Indirect) {
8403 // Store the argument in a stack slot and pass its address.
8404 Align StackAlign =
8405 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8406 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8407 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8408 // If the original argument was split and passed by reference, we need to
8409 // store the required parts of it here (and pass just one address).
8410 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8411 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8412 assert(ArgPartOffset == 0);
8413 // Calculate the total size to store. We don't have access to what we're
8414 // actually storing other than performing the loop and collecting the
8415 // info.
8417 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8418 SDValue PartValue = OutVals[OutIdx + 1];
8419 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8420 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8421 EVT PartVT = PartValue.getValueType();
8422
8423 StoredSize += PartVT.getStoreSize();
8424 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8425 Parts.push_back(std::make_pair(PartValue, Offset));
8426 ++i;
8427 ++OutIdx;
8428 }
8429 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8430 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8431 MemOpChains.push_back(
8432 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8434 for (const auto &Part : Parts) {
8435 SDValue PartValue = Part.first;
8436 SDValue PartOffset = Part.second;
8438 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8439 MemOpChains.push_back(
8440 DAG.getStore(Chain, DL, PartValue, Address,
8442 }
8443 ArgValue = SpillSlot;
8444 } else {
8445 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8446 }
8447
8448 // Use local copy if it is a byval arg.
8449 if (Flags.isByVal())
8450 ArgValue = ByValArgs[j++];
8451
8452 if (VA.isRegLoc()) {
8453 // Queue up the argument copies and emit them at the end.
8454 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8455 } else {
8456 assert(VA.isMemLoc() && "Argument not register or memory");
8457 assert(!IsTailCall && "Tail call not allowed if stack is used "
8458 "for passing parameters");
8459
8460 // Work out the address of the stack slot.
8461 if (!StackPtr.getNode())
8462 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8464 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8466
8467 // Emit the store.
8468 MemOpChains.push_back(
8469 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8470 }
8471 }
8472
8473 // Join the stores, which are independent of one another.
8474 if (!MemOpChains.empty())
8475 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8476
8477 SDValue Glue;
8478
8479 // Build a sequence of copy-to-reg nodes, chained and glued together.
8480 for (auto &Reg : RegsToPass) {
8481 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8482 Glue = Chain.getValue(1);
8483 }
8484
8485 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8486 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8487 // split it and then direct call can be matched by PseudoCALL.
8489 const GlobalValue *GV = S->getGlobal();
8490 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8493 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8494 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8495 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8498 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8499 }
8500
8501 // The first call operand is the chain and the second is the target address.
8503 Ops.push_back(Chain);
8504 Ops.push_back(Callee);
8505
8506 // Add argument registers to the end of the list so that they are
8507 // known live into the call.
8508 for (auto &Reg : RegsToPass)
8509 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8510
8511 if (!IsTailCall) {
8512 // Add a register mask operand representing the call-preserved registers.
8513 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8514 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8515 assert(Mask && "Missing call preserved mask for calling convention");
8516 Ops.push_back(DAG.getRegisterMask(Mask));
8517 }
8518
8519 // Glue the call to the argument copies, if any.
8520 if (Glue.getNode())
8521 Ops.push_back(Glue);
8522
8523 // Emit the call.
8524 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8525 unsigned Op;
8526 switch (DAG.getTarget().getCodeModel()) {
8527 default:
8528 report_fatal_error("Unsupported code model");
8529 case CodeModel::Small:
8530 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8531 break;
8532 case CodeModel::Medium:
8533 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8535 break;
8536 case CodeModel::Large:
8537 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8539 break;
8540 }
8541
8542 if (IsTailCall) {
8544 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8545 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8546 return Ret;
8547 }
8548
8549 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8550 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8551 Glue = Chain.getValue(1);
8552
8553 // Mark the end of the call, which is glued to the call itself.
8554 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8555 Glue = Chain.getValue(1);
8556
8557 // Assign locations to each value returned by this call.
8559 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8560 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8561
8562 // Copy all of the result registers out of their specified physreg.
8563 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8564 auto &VA = RVLocs[i];
8565 // Copy the value out.
8566 SDValue RetValue =
8567 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8568 // Glue the RetValue to the end of the call sequence.
8569 Chain = RetValue.getValue(1);
8570 Glue = RetValue.getValue(2);
8571
8572 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8573 assert(VA.needsCustom());
8574 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8575 MVT::i32, Glue);
8576 Chain = RetValue2.getValue(1);
8577 Glue = RetValue2.getValue(2);
8578 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8579 RetValue, RetValue2);
8580 } else
8581 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8582
8583 InVals.push_back(RetValue);
8584 }
8585
8586 return Chain;
8587}
8588
8590 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8591 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8592 const Type *RetTy) const {
8594 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8595
8596 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8597 LoongArchABI::ABI ABI =
8598 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8599 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8600 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8601 return false;
8602 }
8603 return true;
8604}
8605
8607 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8609 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8610 SelectionDAG &DAG) const {
8611 // Stores the assignment of the return value to a location.
8613
8614 // Info about the registers and stack slot.
8615 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8616 *DAG.getContext());
8617
8618 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8619 nullptr, CC_LoongArch);
8620 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8621 report_fatal_error("GHC functions return void only");
8622 SDValue Glue;
8623 SmallVector<SDValue, 4> RetOps(1, Chain);
8624
8625 // Copy the result values into the output registers.
8626 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8627 SDValue Val = OutVals[OutIdx];
8628 CCValAssign &VA = RVLocs[i];
8629 assert(VA.isRegLoc() && "Can only return in registers!");
8630
8631 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8632 // Handle returning f64 on LA32D with a soft float ABI.
8633 assert(VA.isRegLoc() && "Expected return via registers");
8634 assert(VA.needsCustom());
8636 DAG.getVTList(MVT::i32, MVT::i32), Val);
8637 SDValue Lo = SplitF64.getValue(0);
8638 SDValue Hi = SplitF64.getValue(1);
8639 Register RegLo = VA.getLocReg();
8640 Register RegHi = RVLocs[++i].getLocReg();
8641
8642 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8643 Glue = Chain.getValue(1);
8644 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8645 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8646 Glue = Chain.getValue(1);
8647 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8648 } else {
8649 // Handle a 'normal' return.
8650 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8651 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8652
8653 // Guarantee that all emitted copies are stuck together.
8654 Glue = Chain.getValue(1);
8655 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8656 }
8657 }
8658
8659 RetOps[0] = Chain; // Update chain.
8660
8661 // Add the glue node if we have it.
8662 if (Glue.getNode())
8663 RetOps.push_back(Glue);
8664
8665 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8666}
8667
8668// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8669// Note: The following prefixes are excluded:
8670// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8671// as they can be represented using [x]vrepli.[whb]
8673 const APInt &SplatValue, const unsigned SplatBitSize) const {
8674 uint64_t RequiredImm = 0;
8675 uint64_t V = SplatValue.getZExtValue();
8676 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8677 // 4'b0101
8678 RequiredImm = (0b10101 << 8) | (V >> 8);
8679 return {true, RequiredImm};
8680 } else if (SplatBitSize == 32) {
8681 // 4'b0001
8682 if (!(V & 0xFFFF00FF)) {
8683 RequiredImm = (0b10001 << 8) | (V >> 8);
8684 return {true, RequiredImm};
8685 }
8686 // 4'b0010
8687 if (!(V & 0xFF00FFFF)) {
8688 RequiredImm = (0b10010 << 8) | (V >> 16);
8689 return {true, RequiredImm};
8690 }
8691 // 4'b0011
8692 if (!(V & 0x00FFFFFF)) {
8693 RequiredImm = (0b10011 << 8) | (V >> 24);
8694 return {true, RequiredImm};
8695 }
8696 // 4'b0110
8697 if ((V & 0xFFFF00FF) == 0xFF) {
8698 RequiredImm = (0b10110 << 8) | (V >> 8);
8699 return {true, RequiredImm};
8700 }
8701 // 4'b0111
8702 if ((V & 0xFF00FFFF) == 0xFFFF) {
8703 RequiredImm = (0b10111 << 8) | (V >> 16);
8704 return {true, RequiredImm};
8705 }
8706 // 4'b1010
8707 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8708 RequiredImm =
8709 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8710 return {true, RequiredImm};
8711 }
8712 } else if (SplatBitSize == 64) {
8713 // 4'b1011
8714 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8715 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8716 RequiredImm =
8717 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8718 return {true, RequiredImm};
8719 }
8720 // 4'b1100
8721 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8722 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8723 RequiredImm =
8724 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8725 return {true, RequiredImm};
8726 }
8727 // 4'b1001
8728 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8729 uint8_t res = 0;
8730 for (int i = 0; i < 8; ++i) {
8731 uint8_t byte = x & 0xFF;
8732 if (byte == 0 || byte == 0xFF)
8733 res |= ((byte & 1) << i);
8734 else
8735 return {false, 0};
8736 x >>= 8;
8737 }
8738 return {true, res};
8739 };
8740 auto [IsSame, Suffix] = sameBitsPreByte(V);
8741 if (IsSame) {
8742 RequiredImm = (0b11001 << 8) | Suffix;
8743 return {true, RequiredImm};
8744 }
8745 }
8746 return {false, RequiredImm};
8747}
8748
8750 EVT VT) const {
8751 if (!Subtarget.hasExtLSX())
8752 return false;
8753
8754 if (VT == MVT::f32) {
8755 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8756 return (masked == 0x3e000000 || masked == 0x40000000);
8757 }
8758
8759 if (VT == MVT::f64) {
8760 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8761 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8762 }
8763
8764 return false;
8765}
8766
8767bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8768 bool ForCodeSize) const {
8769 // TODO: Maybe need more checks here after vector extension is supported.
8770 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8771 return false;
8772 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8773 return false;
8774 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8775}
8776
8778 return true;
8779}
8780
8782 return true;
8783}
8784
8785bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8786 const Instruction *I) const {
8787 if (!Subtarget.is64Bit())
8788 return isa<LoadInst>(I) || isa<StoreInst>(I);
8789
8790 if (isa<LoadInst>(I))
8791 return true;
8792
8793 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8794 // require fences beacuse we can use amswap_db.[w/d].
8795 Type *Ty = I->getOperand(0)->getType();
8796 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8797 unsigned Size = Ty->getIntegerBitWidth();
8798 return (Size == 8 || Size == 16);
8799 }
8800
8801 return false;
8802}
8803
8805 LLVMContext &Context,
8806 EVT VT) const {
8807 if (!VT.isVector())
8808 return getPointerTy(DL);
8810}
8811
8813 EVT VT = Y.getValueType();
8814
8815 if (VT.isVector())
8816 return Subtarget.hasExtLSX() && VT.isInteger();
8817
8818 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8819}
8820
8822 const CallInst &I,
8823 MachineFunction &MF,
8824 unsigned Intrinsic) const {
8825 switch (Intrinsic) {
8826 default:
8827 return false;
8828 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8829 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8830 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8831 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8832 Info.opc = ISD::INTRINSIC_W_CHAIN;
8833 Info.memVT = MVT::i32;
8834 Info.ptrVal = I.getArgOperand(0);
8835 Info.offset = 0;
8836 Info.align = Align(4);
8839 return true;
8840 // TODO: Add more Intrinsics later.
8841 }
8842}
8843
8844// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8845// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8846// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8847// regression, we need to implement it manually.
8850
8852 Op == AtomicRMWInst::And) &&
8853 "Unable to expand");
8854 unsigned MinWordSize = 4;
8855
8856 IRBuilder<> Builder(AI);
8857 LLVMContext &Ctx = Builder.getContext();
8858 const DataLayout &DL = AI->getDataLayout();
8859 Type *ValueType = AI->getType();
8860 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8861
8862 Value *Addr = AI->getPointerOperand();
8863 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8864 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8865
8866 Value *AlignedAddr = Builder.CreateIntrinsic(
8867 Intrinsic::ptrmask, {PtrTy, IntTy},
8868 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8869 "AlignedAddr");
8870
8871 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8872 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8873 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8874 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8875 Value *Mask = Builder.CreateShl(
8876 ConstantInt::get(WordType,
8877 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8878 ShiftAmt, "Mask");
8879 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8880 Value *ValOperand_Shifted =
8881 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8882 ShiftAmt, "ValOperand_Shifted");
8883 Value *NewOperand;
8884 if (Op == AtomicRMWInst::And)
8885 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8886 else
8887 NewOperand = ValOperand_Shifted;
8888
8889 AtomicRMWInst *NewAI =
8890 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8891 AI->getOrdering(), AI->getSyncScopeID());
8892
8893 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8894 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8895 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8896 AI->replaceAllUsesWith(FinalOldResult);
8897 AI->eraseFromParent();
8898}
8899
8902 // TODO: Add more AtomicRMWInst that needs to be extended.
8903
8904 // Since floating-point operation requires a non-trivial set of data
8905 // operations, use CmpXChg to expand.
8906 if (AI->isFloatingPointOperation() ||
8912
8913 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8916 AI->getOperation() == AtomicRMWInst::Sub)) {
8918 }
8919
8920 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8921 if (Subtarget.hasLAMCAS()) {
8922 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8926 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8928 }
8929
8930 if (Size == 8 || Size == 16)
8933}
8934
8935static Intrinsic::ID
8937 AtomicRMWInst::BinOp BinOp) {
8938 if (GRLen == 64) {
8939 switch (BinOp) {
8940 default:
8941 llvm_unreachable("Unexpected AtomicRMW BinOp");
8943 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8944 case AtomicRMWInst::Add:
8945 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8946 case AtomicRMWInst::Sub:
8947 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8949 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8951 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8953 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8954 case AtomicRMWInst::Max:
8955 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8956 case AtomicRMWInst::Min:
8957 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8958 // TODO: support other AtomicRMWInst.
8959 }
8960 }
8961
8962 if (GRLen == 32) {
8963 switch (BinOp) {
8964 default:
8965 llvm_unreachable("Unexpected AtomicRMW BinOp");
8967 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8968 case AtomicRMWInst::Add:
8969 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8970 case AtomicRMWInst::Sub:
8971 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8973 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8975 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8977 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8978 case AtomicRMWInst::Max:
8979 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8980 case AtomicRMWInst::Min:
8981 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8982 // TODO: support other AtomicRMWInst.
8983 }
8984 }
8985
8986 llvm_unreachable("Unexpected GRLen\n");
8987}
8988
8991 AtomicCmpXchgInst *CI) const {
8992
8993 if (Subtarget.hasLAMCAS())
8995
8997 if (Size == 8 || Size == 16)
9000}
9001
9003 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9004 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9005 unsigned GRLen = Subtarget.getGRLen();
9006 AtomicOrdering FailOrd = CI->getFailureOrdering();
9007 Value *FailureOrdering =
9008 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9009 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9010 if (GRLen == 64) {
9011 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9012 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9013 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9014 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9015 }
9016 Type *Tys[] = {AlignedAddr->getType()};
9017 Value *Result = Builder.CreateIntrinsic(
9018 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9019 if (GRLen == 64)
9020 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9021 return Result;
9022}
9023
9025 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9026 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9027 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9028 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9029 // mask, as this produces better code than the LL/SC loop emitted by
9030 // int_loongarch_masked_atomicrmw_xchg.
9031 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9034 if (CVal->isZero())
9035 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9036 Builder.CreateNot(Mask, "Inv_Mask"),
9037 AI->getAlign(), Ord);
9038 if (CVal->isMinusOne())
9039 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9040 AI->getAlign(), Ord);
9041 }
9042
9043 unsigned GRLen = Subtarget.getGRLen();
9044 Value *Ordering =
9045 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9046 Type *Tys[] = {AlignedAddr->getType()};
9048 AI->getModule(),
9050
9051 if (GRLen == 64) {
9052 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9053 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9054 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9055 }
9056
9057 Value *Result;
9058
9059 // Must pass the shift amount needed to sign extend the loaded value prior
9060 // to performing a signed comparison for min/max. ShiftAmt is the number of
9061 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9062 // is the number of bits to left+right shift the value in order to
9063 // sign-extend.
9064 if (AI->getOperation() == AtomicRMWInst::Min ||
9066 const DataLayout &DL = AI->getDataLayout();
9067 unsigned ValWidth =
9068 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9069 Value *SextShamt =
9070 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9071 Result = Builder.CreateCall(LlwOpScwLoop,
9072 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9073 } else {
9074 Result =
9075 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9076 }
9077
9078 if (GRLen == 64)
9079 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9080 return Result;
9081}
9082
9084 const MachineFunction &MF, EVT VT) const {
9085 VT = VT.getScalarType();
9086
9087 if (!VT.isSimple())
9088 return false;
9089
9090 switch (VT.getSimpleVT().SimpleTy) {
9091 case MVT::f32:
9092 case MVT::f64:
9093 return true;
9094 default:
9095 break;
9096 }
9097
9098 return false;
9099}
9100
9102 const Constant *PersonalityFn) const {
9103 return LoongArch::R4;
9104}
9105
9107 const Constant *PersonalityFn) const {
9108 return LoongArch::R5;
9109}
9110
9111//===----------------------------------------------------------------------===//
9112// Target Optimization Hooks
9113//===----------------------------------------------------------------------===//
9114
9116 const LoongArchSubtarget &Subtarget) {
9117 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9118 // IEEE float has 23 digits and double has 52 digits.
9119 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9120 return RefinementSteps;
9121}
9122
9124 SelectionDAG &DAG, int Enabled,
9125 int &RefinementSteps,
9126 bool &UseOneConstNR,
9127 bool Reciprocal) const {
9128 if (Subtarget.hasFrecipe()) {
9129 SDLoc DL(Operand);
9130 EVT VT = Operand.getValueType();
9131
9132 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9133 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9134 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9135 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9136 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9137
9138 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9139 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9140
9141 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9142 if (Reciprocal)
9143 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9144
9145 return Estimate;
9146 }
9147 }
9148
9149 return SDValue();
9150}
9151
9153 SelectionDAG &DAG,
9154 int Enabled,
9155 int &RefinementSteps) const {
9156 if (Subtarget.hasFrecipe()) {
9157 SDLoc DL(Operand);
9158 EVT VT = Operand.getValueType();
9159
9160 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9161 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9162 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9163 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9164 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9165
9166 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9167 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9168
9169 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9170 }
9171 }
9172
9173 return SDValue();
9174}
9175
9176//===----------------------------------------------------------------------===//
9177// LoongArch Inline Assembly Support
9178//===----------------------------------------------------------------------===//
9179
9181LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9182 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9183 //
9184 // 'f': A floating-point register (if available).
9185 // 'k': A memory operand whose address is formed by a base register and
9186 // (optionally scaled) index register.
9187 // 'l': A signed 16-bit constant.
9188 // 'm': A memory operand whose address is formed by a base register and
9189 // offset that is suitable for use in instructions with the same
9190 // addressing mode as st.w and ld.w.
9191 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9192 // instruction)
9193 // 'I': A signed 12-bit constant (for arithmetic instructions).
9194 // 'J': Integer zero.
9195 // 'K': An unsigned 12-bit constant (for logic instructions).
9196 // "ZB": An address that is held in a general-purpose register. The offset is
9197 // zero.
9198 // "ZC": A memory operand whose address is formed by a base register and
9199 // offset that is suitable for use in instructions with the same
9200 // addressing mode as ll.w and sc.w.
9201 if (Constraint.size() == 1) {
9202 switch (Constraint[0]) {
9203 default:
9204 break;
9205 case 'f':
9206 case 'q':
9207 return C_RegisterClass;
9208 case 'l':
9209 case 'I':
9210 case 'J':
9211 case 'K':
9212 return C_Immediate;
9213 case 'k':
9214 return C_Memory;
9215 }
9216 }
9217
9218 if (Constraint == "ZC" || Constraint == "ZB")
9219 return C_Memory;
9220
9221 // 'm' is handled here.
9222 return TargetLowering::getConstraintType(Constraint);
9223}
9224
9225InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9226 StringRef ConstraintCode) const {
9227 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9231 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9232}
9233
9234std::pair<unsigned, const TargetRegisterClass *>
9235LoongArchTargetLowering::getRegForInlineAsmConstraint(
9236 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9237 // First, see if this is a constraint that directly corresponds to a LoongArch
9238 // register class.
9239 if (Constraint.size() == 1) {
9240 switch (Constraint[0]) {
9241 case 'r':
9242 // TODO: Support fixed vectors up to GRLen?
9243 if (VT.isVector())
9244 break;
9245 return std::make_pair(0U, &LoongArch::GPRRegClass);
9246 case 'q':
9247 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9248 case 'f':
9249 if (Subtarget.hasBasicF() && VT == MVT::f32)
9250 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9251 if (Subtarget.hasBasicD() && VT == MVT::f64)
9252 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9253 if (Subtarget.hasExtLSX() &&
9254 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9255 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9256 if (Subtarget.hasExtLASX() &&
9257 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9258 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9259 break;
9260 default:
9261 break;
9262 }
9263 }
9264
9265 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9266 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9267 // constraints while the official register name is prefixed with a '$'. So we
9268 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9269 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9270 // case insensitive, so no need to convert the constraint to upper case here.
9271 //
9272 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9273 // decode the usage of register name aliases into their official names. And
9274 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9275 // official register names.
9276 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9277 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9278 bool IsFP = Constraint[2] == 'f';
9279 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9280 std::pair<unsigned, const TargetRegisterClass *> R;
9282 TRI, join_items("", Temp.first, Temp.second), VT);
9283 // Match those names to the widest floating point register type available.
9284 if (IsFP) {
9285 unsigned RegNo = R.first;
9286 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9287 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9288 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9289 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9290 }
9291 }
9292 }
9293 return R;
9294 }
9295
9296 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9297}
9298
9299void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9300 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9301 SelectionDAG &DAG) const {
9302 // Currently only support length 1 constraints.
9303 if (Constraint.size() == 1) {
9304 switch (Constraint[0]) {
9305 case 'l':
9306 // Validate & create a 16-bit signed immediate operand.
9307 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9308 uint64_t CVal = C->getSExtValue();
9309 if (isInt<16>(CVal))
9310 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9311 Subtarget.getGRLenVT()));
9312 }
9313 return;
9314 case 'I':
9315 // Validate & create a 12-bit signed immediate operand.
9316 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9317 uint64_t CVal = C->getSExtValue();
9318 if (isInt<12>(CVal))
9319 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9320 Subtarget.getGRLenVT()));
9321 }
9322 return;
9323 case 'J':
9324 // Validate & create an integer zero operand.
9325 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9326 if (C->getZExtValue() == 0)
9327 Ops.push_back(
9328 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9329 return;
9330 case 'K':
9331 // Validate & create a 12-bit unsigned immediate operand.
9332 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9333 uint64_t CVal = C->getZExtValue();
9334 if (isUInt<12>(CVal))
9335 Ops.push_back(
9336 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9337 }
9338 return;
9339 default:
9340 break;
9341 }
9342 }
9344}
9345
9346#define GET_REGISTER_MATCHER
9347#include "LoongArchGenAsmMatcher.inc"
9348
9351 const MachineFunction &MF) const {
9352 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9353 std::string NewRegName = Name.second.str();
9354 Register Reg = MatchRegisterAltName(NewRegName);
9355 if (!Reg)
9356 Reg = MatchRegisterName(NewRegName);
9357 if (!Reg)
9358 return Reg;
9359 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9360 if (!ReservedRegs.test(Reg))
9361 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9362 StringRef(RegName) + "\"."));
9363 return Reg;
9364}
9365
9367 EVT VT, SDValue C) const {
9368 // TODO: Support vectors.
9369 if (!VT.isScalarInteger())
9370 return false;
9371
9372 // Omit the optimization if the data size exceeds GRLen.
9373 if (VT.getSizeInBits() > Subtarget.getGRLen())
9374 return false;
9375
9376 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9377 const APInt &Imm = ConstNode->getAPIntValue();
9378 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9379 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9380 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9381 return true;
9382 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9383 if (ConstNode->hasOneUse() &&
9384 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9385 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9386 return true;
9387 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9388 // in which the immediate has two set bits. Or Break (MUL x, imm)
9389 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9390 // equals to (1 << s0) - (1 << s1).
9391 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9392 unsigned Shifts = Imm.countr_zero();
9393 // Reject immediates which can be composed via a single LUI.
9394 if (Shifts >= 12)
9395 return false;
9396 // Reject multiplications can be optimized to
9397 // (SLLI (ALSL x, x, 1/2/3/4), s).
9398 APInt ImmPop = Imm.ashr(Shifts);
9399 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9400 return false;
9401 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9402 // since it needs one more instruction than other 3 cases.
9403 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9404 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9405 (ImmSmall - Imm).isPowerOf2())
9406 return true;
9407 }
9408 }
9409
9410 return false;
9411}
9412
9414 const AddrMode &AM,
9415 Type *Ty, unsigned AS,
9416 Instruction *I) const {
9417 // LoongArch has four basic addressing modes:
9418 // 1. reg
9419 // 2. reg + 12-bit signed offset
9420 // 3. reg + 14-bit signed offset left-shifted by 2
9421 // 4. reg1 + reg2
9422 // TODO: Add more checks after support vector extension.
9423
9424 // No global is ever allowed as a base.
9425 if (AM.BaseGV)
9426 return false;
9427
9428 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9429 // with `UAL` feature.
9430 if (!isInt<12>(AM.BaseOffs) &&
9431 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9432 return false;
9433
9434 switch (AM.Scale) {
9435 case 0:
9436 // "r+i" or just "i", depending on HasBaseReg.
9437 break;
9438 case 1:
9439 // "r+r+i" is not allowed.
9440 if (AM.HasBaseReg && AM.BaseOffs)
9441 return false;
9442 // Otherwise we have "r+r" or "r+i".
9443 break;
9444 case 2:
9445 // "2*r+r" or "2*r+i" is not allowed.
9446 if (AM.HasBaseReg || AM.BaseOffs)
9447 return false;
9448 // Allow "2*r" as "r+r".
9449 break;
9450 default:
9451 return false;
9452 }
9453
9454 return true;
9455}
9456
9458 return isInt<12>(Imm);
9459}
9460
9462 return isInt<12>(Imm);
9463}
9464
9466 // Zexts are free if they can be combined with a load.
9467 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9468 // poorly with type legalization of compares preferring sext.
9469 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9470 EVT MemVT = LD->getMemoryVT();
9471 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9472 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9473 LD->getExtensionType() == ISD::ZEXTLOAD))
9474 return true;
9475 }
9476
9477 return TargetLowering::isZExtFree(Val, VT2);
9478}
9479
9481 EVT DstVT) const {
9482 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9483}
9484
9486 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9487}
9488
9490 // TODO: Support vectors.
9491 if (Y.getValueType().isVector())
9492 return false;
9493
9494 return !isa<ConstantSDNode>(Y);
9495}
9496
9498 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9499 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9500}
9501
9503 Type *Ty, bool IsSigned) const {
9504 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9505 return true;
9506
9507 return IsSigned;
9508}
9509
9511 // Return false to suppress the unnecessary extensions if the LibCall
9512 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9513 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9514 Type.getSizeInBits() < Subtarget.getGRLen()))
9515 return false;
9516 return true;
9517}
9518
9519// memcpy, and other memory intrinsics, typically tries to use wider load/store
9520// if the source/dest is aligned and the copy size is large enough. We therefore
9521// want to align such objects passed to memory intrinsics.
9523 unsigned &MinSize,
9524 Align &PrefAlign) const {
9525 if (!isa<MemIntrinsic>(CI))
9526 return false;
9527
9528 if (Subtarget.is64Bit()) {
9529 MinSize = 8;
9530 PrefAlign = Align(8);
9531 } else {
9532 MinSize = 4;
9533 PrefAlign = Align(4);
9534 }
9535
9536 return true;
9537}
9538
9547
9548bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9549 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9550 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9551 bool IsABIRegCopy = CC.has_value();
9552 EVT ValueVT = Val.getValueType();
9553
9554 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9555 PartVT == MVT::f32) {
9556 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9557 // nan, and cast to f32.
9558 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9559 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9560 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9561 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9562 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9563 Parts[0] = Val;
9564 return true;
9565 }
9566
9567 return false;
9568}
9569
9570SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9571 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9572 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9573 bool IsABIRegCopy = CC.has_value();
9574
9575 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9576 PartVT == MVT::f32) {
9577 SDValue Val = Parts[0];
9578
9579 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9580 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9581 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9582 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9583 return Val;
9584 }
9585
9586 return SDValue();
9587}
9588
9589MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9590 CallingConv::ID CC,
9591 EVT VT) const {
9592 // Use f32 to pass f16.
9593 if (VT == MVT::f16 && Subtarget.hasBasicF())
9594 return MVT::f32;
9595
9597}
9598
9599unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9600 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9601 // Use f32 to pass f16.
9602 if (VT == MVT::f16 && Subtarget.hasBasicF())
9603 return 1;
9604
9606}
9607
9609 SDValue Op, const APInt &OriginalDemandedBits,
9610 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9611 unsigned Depth) const {
9612 EVT VT = Op.getValueType();
9613 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9614 unsigned Opc = Op.getOpcode();
9615 switch (Opc) {
9616 default:
9617 break;
9620 SDValue Src = Op.getOperand(0);
9621 MVT SrcVT = Src.getSimpleValueType();
9622 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9623 unsigned NumElts = SrcVT.getVectorNumElements();
9624
9625 // If we don't need the sign bits at all just return zero.
9626 if (OriginalDemandedBits.countr_zero() >= NumElts)
9627 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9628
9629 // Only demand the vector elements of the sign bits we need.
9630 APInt KnownUndef, KnownZero;
9631 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9632 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9633 TLO, Depth + 1))
9634 return true;
9635
9636 Known.Zero = KnownZero.zext(BitWidth);
9637 Known.Zero.setHighBits(BitWidth - NumElts);
9638
9639 // [X]VMSKLTZ only uses the MSB from each vector element.
9640 KnownBits KnownSrc;
9641 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9642 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9643 Depth + 1))
9644 return true;
9645
9646 if (KnownSrc.One[SrcBits - 1])
9647 Known.One.setLowBits(NumElts);
9648 else if (KnownSrc.Zero[SrcBits - 1])
9649 Known.Zero.setLowBits(NumElts);
9650
9651 // Attempt to avoid multi-use ops if we don't need anything from it.
9653 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9654 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9655 return false;
9656 }
9657 }
9658
9660 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9661}
9662
9664 unsigned Opc = VecOp.getOpcode();
9665
9666 // Assume target opcodes can't be scalarized.
9667 // TODO - do we have any exceptions?
9668 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9669 return false;
9670
9671 // If the vector op is not supported, try to convert to scalar.
9672 EVT VecVT = VecOp.getValueType();
9674 return true;
9675
9676 // If the vector op is supported, but the scalar op is not, the transform may
9677 // not be worthwhile.
9678 EVT ScalarVT = VecVT.getScalarType();
9679 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9680}
9681
9683 unsigned Index) const {
9685 return false;
9686
9687 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9688 return Index == 0;
9689}
9690
9692 unsigned Index) const {
9693 EVT EltVT = VT.getScalarType();
9694
9695 // Extract a scalar FP value from index 0 of a vector is free.
9696 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9697}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1414
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...